tech-kern archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
Re: Revisiting DTrace syscall provider
On Sat, Mar 7, 2015 at 6:10 AM, Christos Zoulas <christos%astron.com@localhost> wrote:
> In article <CAKrYomg6z8SCUvMZQmfk+n09n+1XfNiZnc4fAC59zxoALJocmg%mail.gmail.com@localhost>,
> Ryota Ozaki <ozaki-r%netbsd.org@localhost> wrote:
>>Anyway I updated my patches; they're based on latest -current.
>>
>>Changes since the previous are:
>>- Remove an unexpected contribution comment from kern_dtrace.c
>> (thanks riastradh!)
>>- Don't unload systrace.kmod when there are users using dtrace
>>- Add "created from" line to *_systrace_args.c
>>
>>http://www.netbsd.org/~ozaki-r/systrace.diff
>>http://www.netbsd.org/~ozaki-r/systrace-full.diff
>>https://github.com/ozaki-r/netbsd-src/commits/dtrace-syscall-provider2
>
> Thanks for working on this... I had done mostly the same, but with a few
> differences:
>
> 1. I named the module dtrace_systrace to match with the other
> dtrace modules.
I first did so but I changed back to systrace because FreeBSD named it
systrace and named modules for syscall emulations as systrace_linux32
and systrace_freebsd32. If we follow so, we should keep systrace name as is?
> 2. I did not create the args union to deal with signed/unsigned,
> chose to mimick what FreeBSD does. Do you prefer your way?
I don't mind if it works. It was just as riz did.
> 3. I fixed the entry/exit probe functions to deal with the return
> value of the syscall, as well as the entry and exit argument
> descriptions.
> 4. I am not sure if creating systrace_foo files in the emulations
> should be done right now, since we don't really load/use them.
> This should be a todo item, perhaps these functions should be
> the emulation structure and loaded with the emulation. But
> that should be phase II...
Yes I agree.
> 5. I bumped the kernel because of struct sysent changes.
Sure.
>
> Anyway it works nicely, and I'd like to commit it if you don't mind
> (or if you have any changes).
NP, please do it :)
ozaki-r
>
> Thanks,
>
> christos
>
> Index: kern/Makefile
> ===================================================================
> RCS file: /cvsroot/src/sys/kern/Makefile,v
> retrieving revision 1.17
> diff -u -u -r1.17 Makefile
> --- kern/Makefile 16 Jan 2014 01:15:34 -0000 1.17
> +++ kern/Makefile 6 Mar 2015 20:55:27 -0000
> @@ -11,7 +11,7 @@
> @false
>
> SYSCALLSRC = makesyscalls.sh syscalls.conf syscalls.master
> -init_sysent.c syscalls.c ../sys/syscall.h ../sys/syscallargs.h: ${SYSCALLSRC}
> +init_sysent.c syscalls.c systrace_args.c ../sys/syscall.h ../sys/syscallargs.h: ${SYSCALLSRC}
> ${HOST_SH} makesyscalls.sh syscalls.conf syscalls.master
>
> VNODEIFSRC = vnode_if.sh vnode_if.src
> Index: kern/makesyscalls.sh
> ===================================================================
> RCS file: /cvsroot/src/sys/kern/makesyscalls.sh,v
> retrieving revision 1.145
> diff -u -u -r1.145 makesyscalls.sh
> --- kern/makesyscalls.sh 24 Jul 2014 11:58:45 -0000 1.145
> +++ kern/makesyscalls.sh 6 Mar 2015 20:55:28 -0000
> @@ -61,6 +61,7 @@
> # source the config file.
> sys_nosys="sys_nosys" # default is sys_nosys(), if not specified otherwise
> maxsysargs=8 # default limit is 8 (32bit) arguments
> +systrace="/dev/null"
> rumpcalls="/dev/null"
> rumpcallshdr="/dev/null"
> rumpsysmap="/dev/null"
> @@ -75,15 +76,17 @@
> sysnamesbottom="sysnames.bottom"
> rumptypes="rumphdr.types"
> rumpprotos="rumphdr.protos"
> +systracetmp="systrace.$$"
> +systraceret="systraceret.$$"
>
> -trap "rm $sysdcl $sysprotos $sysent $sysnamesbottom $rumpsysent $rumptypes $rumpprotos" 0
> +trap "rm $sysdcl $sysprotos $sysent $sysnamesbottom $rumpsysent $rumptypes $rumpprotos $systracetmp $systraceret" 0
>
> # Awk program (must support nawk extensions)
> # Use "awk" at Berkeley, "nawk" or "gawk" elsewhere.
> awk=${AWK:-awk}
>
> # Does this awk have a "toupper" function?
> -have_toupper=`$awk 'BEGIN { print toupper("true"); exit; }' 2>/dev/null`
> +have_toupper="$($awk 'BEGIN { print toupper("true"); exit; }' 2>/dev/null)"
>
> # If this awk does not define "toupper" then define our own.
> if [ "$have_toupper" = TRUE ] ; then
> @@ -137,6 +140,9 @@
> sysnumhdr = \"$sysnumhdr\"
> sysarghdr = \"$sysarghdr\"
> sysarghdrextra = \"$sysarghdrextra\"
> + systrace = \"$systrace\"
> + systracetmp = \"$systracetmp\"
> + systraceret = \"$systraceret\"
> rumpcalls = \"$rumpcalls\"
> rumpcallshdr = \"$rumpcallshdr\"
> rumpsysent = \"$rumpsysent\"
> @@ -211,6 +217,10 @@
> printf "/* %s */\n\n", tag > rumpcallshdr
> printf "/*\n * System call protos in rump namespace.\n *\n" > rumpcallshdr
> printf " * DO NOT EDIT-- this file is automatically generated.\n" > rumpcallshdr
> +
> + printf "/* %s */\n\n", tag > systrace
> + printf "/*\n * System call argument to DTrace register array converstion.\n *\n" > systrace
> + printf " * DO NOT EDIT-- this file is automatically generated.\n" > systrace
> }
> NR == 1 {
> sub(/ $/, "")
> @@ -324,6 +334,17 @@
> "\t\t<= %sMAXSYSARGS * sizeof (%s) ? 1 : -1];\n", \
> constprefix, registertype) >sysarghdr
>
> + printf " * This file is part of the DTrace syscall provider.\n */\n\n" > systrace
> + printf "static void\nsystrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)\n{\n" > systrace
> + printf "\tint64_t *iarg = (int64_t *) uarg;\n" > systrace
> + printf "\tswitch (sysnum) {\n" > systrace
> +
> + printf "static void\nsystrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systracetmp
> + printf "\tswitch (sysnum) {\n" > systracetmp
> +
> + printf "static void\nsystrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)\n{\n\tconst char *p = NULL;\n" > systraceret
> + printf "\tswitch (sysnum) {\n" > systraceret
> +
> # compat types from syscalls.master. this is slightly ugly,
> # but given that we have so few compats from over 17 years,
> # a more complicated solution is not currently warranted.
> @@ -376,6 +397,9 @@
> print > sysnumhdr
> print > sysprotos
> print > sysnamesbottom
> + print > systrace
> + print > systracetmp
> + print > systraceret
>
> # XXX: technically we do not want to have conditionals in rump,
> # but it is easier to just let the cpp handle them than try to
> @@ -647,7 +671,7 @@
> eno[1] = "rumpns_sys_nomodule"
> flags[0] = "SYCALL_NOSYS"
> flags[1] = "0"
> - printf("\t{ 0, 0, %s,\n\t (sy_call_t *)%s }, \t" \
> + printf("\t{ 0, 0, %s,\n\t (sy_call_t *)%s, NULL, 0, 0 }, \t" \
> "/* %d = %s */\n", \
> flags[modular], eno[modular], syscall, funcalias) \
> > rumpsysent
> @@ -665,7 +689,7 @@
> fn="(sy_call_t *)rumpns_sys_nomodule"
> else
> fn="(sy_call_t *)rumpns_enosys"
> - printf("0,\n\t %s },", fn) > rumpsysent
> + printf("0,\n\t %s, NULL, 0, 0 },", fn) > rumpsysent
> for (i = 0; i < (33 - length(fn)) / 8; i++)
> printf("\t") > rumpsysent
> printf("/* %d = %s%s */\n", syscall, compatwrap_, funcalias) > rumpsysent
> @@ -697,6 +721,40 @@
> syscall, wfn, funcalias, rumpentry) > rumpsysmap
> }
>
> +function putsystrace(type, compatwrap_) {
> + printf("\t/* %s */\n\tcase %d: {\n", funcname, syscall) > systrace
> + printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systracetmp
> + printf("\t/* %s */\n\tcase %d:\n", funcname, syscall) > systraceret
> + if (argc > 0) {
> + printf("\t\tswitch(ndx) {\n") > systracetmp
> + printf("\t\tstruct %s%s_args *p = params;\n", compatwrap_, funcname) > systrace
> + for (i = 1; i <= argc; i++) {
> + arg = argtype[i]
> + sub("__restrict$", "", arg)
> + printf("\t\tcase %d:\n\t\t\tp = \"%s\";\n\t\t\tbreak;\n", i - 1, arg) > systracetmp
> + if (index(arg, "*") > 0 || arg == "caddr_t")
> + printf("\t\tuarg[%d] = (intptr_t) SCARG(p, %s); /* %s */\n", \
> + i - 1, \
> + argname[i], arg) > systrace
> + else if (substr(arg, 1, 1) == "u" || arg == "size_t")
> + printf("\t\tuarg[%d] = SCARG(p, %s); /* %s */\n", \
> + i - 1, \
> + argname[i], arg) > systrace
> + else
> + printf("\t\tiarg[%d] = SCARG(p, %s); /* %s */\n", \
> + i - 1, \
> + argname[i], arg) > systrace
> + }
> + printf("\t\tdefault:\n\t\t\tbreak;\n\t\t};\n") > systracetmp
> +
> + printf("\t\tif (ndx == 0 || ndx == 1)\n") > systraceret
> + printf("\t\t\tp = \"%s\";\n", returntype) > systraceret
> + printf("\t\tbreak;\n") > systraceret
> + }
> + printf("\t\t*n_args = %d;\n\t\tbreak;\n\t}\n", argc) > systrace
> + printf("\t\tbreak;\n") > systracetmp
> +}
> +
> function putent(type, compatwrap) {
> # output syscall declaration for switch table.
> if (compatwrap == "")
> @@ -708,6 +766,7 @@
> else {
> arg_type = "struct " compatwrap_ funcname "_args";
> }
> + putsystrace(type, compatwrap_)
> proto = "int\t" compatwrap_ funcname "(struct lwp *, const " \
> arg_type " *, register_t *);\n"
> if (sysmap[proto] != 1) {
> @@ -729,7 +788,7 @@
> else
> wfn = compatwrap "(" funcname ")";
> wfn_cast="(sy_call_t *)" wfn
> - printf("%s,\n\t %s },", sycall_flags, wfn_cast) > sysent
> + printf("%s,\n\t %s, NULL, 0, 0 },", sycall_flags, wfn_cast) > sysent
> for (i = 0; i < (33 - length(wfn_cast)) / 8; i++)
> printf("\t") > sysent
> printf("/* %d = %s%s */\n", syscall, compatwrap_, funcalias) > sysent
> @@ -922,9 +981,9 @@
> else
> sys_stub = sys_nosys;
>
> - printf("\t{ 0, 0, 0,\n\t %s },\t\t\t/* %d = %s */\n", \
> + printf("\t{ 0, 0, 0,\n\t %s, NULL, 0, 0 },\t\t\t/* %d = %s */\n", \
> sys_stub, syscall, comment) > sysent
> - printf("\t{ 0, 0, SYCALL_NOSYS,\n\t %s },\t\t/* %d = %s */\n", \
> + printf("\t{ 0, 0, SYCALL_NOSYS,\n\t %s, NULL, 0, 0 },\t\t/* %d = %s */\n", \
> "(sy_call_t *)rumpns_enosys", syscall, comment) > rumpsysent
> printf("\t/* %3d */\t\"#%d (%s)\",\n", syscall, syscall, comment) \
> > sysnamesbottom
> @@ -989,9 +1048,9 @@
> exit 1
> }
> while (syscall < nsysent) {
> - printf("\t{ 0, 0, 0,\n\t %s },\t\t\t/* %d = filler */\n", \
> + printf("\t{ 0, 0, 0,\n\t %s, NULL, 0, 0 },\t\t\t/* %d = filler */\n", \
> sys_nosys, syscall) > sysent
> - printf("\t{ 0, 0, SYCALL_NOSYS,\n\t %s },\t\t/* %d = filler */\n", \
> + printf("\t{ 0, 0, SYCALL_NOSYS,\n\t %s, NULL, 0, 0 },\t\t/* %d = filler */\n", \
> "(sy_call_t *)rumpns_enosys", syscall) > rumpsysent
> printf("\t/* %3d */\t\"# filler\",\n", syscall) \
> > sysnamesbottom
> @@ -1009,6 +1068,9 @@
> printf("#define\t%sMAXSYSCALL\t%d\n", constprefix, maxsyscall) > sysnumhdr
> if (nsysent)
> printf("#define\t%sNSYSENT\t%d\n", constprefix, nsysent) > sysnumhdr
> + printf "\tdefault:\n\t\t*n_args = 0;\n\t\tbreak;\n\t};\n}\n" > systrace
> + printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systracetmp
> + printf "\tdefault:\n\t\tbreak;\n\t};\n\tif (p != NULL)\n\t\tstrlcpy(desc, p, descsz);\n}\n" > systraceret
> } '
>
> cat $sysprotos >> $sysarghdr
> @@ -1026,5 +1088,8 @@
>
> #chmod 444 $sysnames $sysnumhdr $syssw
>
> +cat $systracetmp >> $systrace
> +cat $systraceret >> $systrace
> +
> echo Generated following files:
> -echo $sysarghdr $sysnumhdr $syssw $sysnames $rumpcalls $rumpcallshdr $rumpsysmap
> +echo $sysarghdr $sysnumhdr $syssw $sysnames $systrace $rumpcalls $rumpcallshdr $rumpsysmap
> Index: sys/param.h
> ===================================================================
> RCS file: /cvsroot/src/sys/sys/param.h,v
> retrieving revision 1.465
> diff -u -u -r1.465 param.h
> --- sys/param.h 14 Feb 2015 12:59:02 -0000 1.465
> +++ sys/param.h 6 Mar 2015 20:55:50 -0000
> @@ -63,7 +63,7 @@
> * 2.99.9 (299000900)
> */
>
> -#define __NetBSD_Version__ 799000500 /* NetBSD 7.99.5 */
> +#define __NetBSD_Version__ 799000600 /* NetBSD 7.99.6 */
>
> #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \
> (m) * 1000000) + (p) * 100) <= __NetBSD_Version__)
> Index: sys/syscallvar.h
> ===================================================================
> RCS file: /cvsroot/src/sys/sys/syscallvar.h,v
> retrieving revision 1.9
> diff -u -u -r1.9 syscallvar.h
> --- sys/syscallvar.h 4 Mar 2014 03:24:03 -0000 1.9
> +++ sys/syscallvar.h 6 Mar 2015 20:55:50 -0000
> @@ -36,6 +36,10 @@
> #error nothing of interest to userspace here
> #endif
>
> +#if defined(_KERNEL) && defined(_KERNEL_OPT)
> +#include "opt_dtrace.h"
> +#endif
> +
> #include <sys/systm.h>
> #include <sys/proc.h>
>
> @@ -72,6 +76,16 @@
> (sy->sy_flags & SYCALL_INDIRECT) == 0;
> int error;
>
> +#ifdef KDTRACE_HOOKS
> + /*
> + * If the systrace module has registered its probe
> + * callback and if there is a probe active for the
> + * syscall 'entry', process the probe.
> + */
> + if (__predict_false(systrace_probe_func != NULL &&
> + sy->sy_entry != 0))
> + (*systrace_probe_func)(sy->sy_entry, code, sy, uap, 0);
> +#endif
> if (__predict_true(!do_trace)
> || (error = trace_enter(code, uap, sy->sy_narg)) == 0) {
> rval[0] = 0;
> @@ -85,6 +99,18 @@
> error = sy_call(sy, l, uap, rval);
> }
>
> +#ifdef KDTRACE_HOOKS
> + /*
> + * If the systrace module has registered its probe
> + * callback and if there is a probe active for the
> + * syscall 'return', process the probe.
> + */
> + if (__predict_false(systrace_probe_func != NULL &&
> + sy->sy_return != 0))
> + (*systrace_probe_func)(sy->sy_return, code, sy,
> + uap, error ? -1 : rval[0]);
> +#endif
> +
> if (__predict_false(do_trace)) {
> trace_exit(code, rval, error);
> }
> Index: sys/systm.h
> ===================================================================
> RCS file: /cvsroot/src/sys/sys/systm.h,v
> retrieving revision 1.266
> diff -u -u -r1.266 systm.h
> --- sys/systm.h 3 Aug 2014 12:49:32 -0000 1.266
> +++ sys/systm.h 6 Mar 2015 20:55:50 -0000
> @@ -57,6 +57,7 @@
> struct clockframe;
> struct lwp;
> struct proc;
> +struct sysent;
> struct timeval;
> struct tty;
> struct uio;
> @@ -115,11 +116,26 @@
>
> typedef int sy_call_t(struct lwp *, const void *, register_t *);
>
> +/* Used by the machine dependent syscall() code. */
> +typedef void (*systrace_probe_func_t)(uint32_t, int, const struct sysent *,
> + const void *, register_t);
> +
> +/*
> + * Used by loaded syscalls to convert arguments to a DTrace array
> + * of 64-bit arguments.
> + */
> +typedef void (*systrace_args_func_t)(int, void *, uint64_t *, int *);
> +
> +extern systrace_probe_func_t systrace_probe_func;
> +
> extern struct sysent { /* system call table */
> short sy_narg; /* number of args */
> short sy_argsize; /* total size of arguments */
> int sy_flags; /* flags. see below */
> sy_call_t *sy_call; /* implementing function */
> + systrace_args_func_t sy_systrace_args_func;
> + uint32_t sy_entry; /* DTrace entry ID for systrace. */
> + uint32_t sy_return; /* DTrace return ID for systrace. */
> } sysent[];
> extern int nsysent;
> #if BYTE_ORDER == BIG_ENDIAN
> Index: dev/systrace/systrace.c
> ===================================================================
> RCS file: /cvsroot/src/external/cddl/osnet/dev/systrace/systrace.c,v
> retrieving revision 1.4
> diff -u -u -r1.4 systrace.c
> --- dev/systrace/systrace.c 12 Jan 2014 17:49:30 -0000 1.4
> +++ dev/systrace/systrace.c 6 Mar 2015 21:00:41 -0000
> @@ -169,7 +169,7 @@
> systrace_probe(u_int32_t id, int sysnum, struct sysent *se, void *params)
> {
> int n_args = 0;
> - union systrace_probe_args_un uargs[SYS_MAXSYSARGS];
> + uint64_t uargs[SYS_MAXSYSARGS];
>
> /*
> * Check if this syscall has an argument conversion function
> @@ -191,8 +191,7 @@
> systrace_args(sysnum, params, uargs, &n_args);
>
> /* Process the probe using the converted argments. */
> - dtrace_probe(id, uargs[0].u, uargs[1].u, uargs[2].u, uargs[3].u,
> - uargs[4].u);
> + dtrace_probe(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]);
> }
> #endif
>
> @@ -201,8 +200,12 @@
> {
> int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
>
> - systrace_setargdesc(sysnum, desc->dtargd_ndx, desc->dtargd_native,
> - sizeof(desc->dtargd_native));
> + if (SYSTRACE_ISENTRY((uintptr_t)parg))
> + systrace_entry_setargdesc(sysnum, desc->dtargd_ndx,
> + desc->dtargd_native, sizeof(desc->dtargd_native));
> + else
> + systrace_return_setargdesc(sysnum, desc->dtargd_ndx,
> + desc->dtargd_native, sizeof(desc->dtargd_native));
>
> if (desc->dtargd_native[0] == '\0')
> desc->dtargd_ndx = DTRACE_ARGNONE;
> @@ -371,8 +374,10 @@
> return 0;
>
> case MODULE_CMD_FINI:
> - systrace_unload();
> - return 0;
> + return systrace_unload();
> +
> + case MODULE_CMD_AUTOUNLOAD:
> + return EBUSY;
>
> default:
> return ENOTTY;
> Index: dist/uts/common/dtrace/dtrace.c
> ===================================================================
> RCS file: /cvsroot/src/external/cddl/osnet/dist/uts/common/dtrace/dtrace.c,v
> retrieving revision 1.29
> diff -u -u -r1.29 dtrace.c
> --- dist/uts/common/dtrace/dtrace.c 31 Jan 2015 13:36:29 -0000 1.29
> +++ dist/uts/common/dtrace/dtrace.c 6 Mar 2015 21:00:43 -0000
> @@ -7893,7 +7893,7 @@
> */
> dtrace_id_t
> dtrace_probe_lookup(dtrace_provider_id_t prid, char *mod,
> - char *func, char *name)
> + char *func, const char *name)
> {
> dtrace_probekey_t pkey;
> dtrace_id_t id;
> @@ -7905,7 +7905,7 @@
> pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
> pkey.dtpk_func = func;
> pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
> - pkey.dtpk_name = name;
> + pkey.dtpk_name = __UNCONST(name);
> pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
> pkey.dtpk_id = DTRACE_IDNONE;
>
> Index: dist/uts/common/sys/dtrace.h
> ===================================================================
> RCS file: /cvsroot/src/external/cddl/osnet/dist/uts/common/sys/dtrace.h,v
> retrieving revision 1.10
> diff -u -u -r1.10 dtrace.h
> --- dist/uts/common/sys/dtrace.h 15 Mar 2014 08:00:19 -0000 1.10
> +++ dist/uts/common/sys/dtrace.h 6 Mar 2015 21:00:43 -0000
> @@ -2056,7 +2056,7 @@
> extern int dtrace_condense(dtrace_provider_id_t);
> extern void dtrace_invalidate(dtrace_provider_id_t);
> extern dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t, char *,
> - char *, char *);
> + char *, const char *);
> extern dtrace_id_t dtrace_probe_create(dtrace_provider_id_t, const char *,
> const char *, const char *, int, void *);
> extern void *dtrace_probe_arg(dtrace_provider_id_t, dtrace_id_t);
>
Home |
Main Index |
Thread Index |
Old Index