Subject: Re: typed copyin/copyout (was: Re: Show sysctl activity in ktrace)
To: None <tech-kern@NetBSD.org>
From: der Mouse <mouse@Rodents.Montreal.QC.CA>
List: tech-kern
Date: 09/21/2006 04:59:40
>> If we really want a better ktrace, we should get rid of it
>> completely and instead integrate dtrace from OpenSolaris.
> Certainly something that works the way the SVR4 (and probably
> solaris) 'truss' program does [1] would be better for general
> monitoring of programs

Some years back, I added a PT_SYSCALL to ptrace().  Mycroft rejected
the patches - probably deservedly so, but it's still a little annoying
that nobody else has really had much opportunity to pick them up and
beat them into shape.

However, I don't consider truss-style monitoring a suitable substitute
for ktrace-style monitoring.  ktrace monitoring handles tracing through
forks *much* better, and it borders on impossible to truss-style trace
a debugger debugging another process - the tracer and the debugger
fight over who gets to be the (single) tracer of the traced process.
ktrace monitoring is also much harder for the traced process to detect;
while I rarely want to trace something that's explicitly trying to
detect whether it's traced, this also makes it less disruptive in the
Heisenbergian sense of the observer disrupting the observed.

> Of course you lose the tracing of internal kernel events - eg namei.

How often does the kernel do a namei that's on behalf of a process but
not directly provoked by a syscall with a pathname argument?  The only
case that comes to mind offhand is taking core dumps.

> [1] process blocks on syscall entry/exit allowing the tracing process
> to determine the syscall arguments and read (write??) the traced
> processes memory.

Yes, writing is important, especially if you want to use the underlying
facilities to implement hackery like hiding certain files or IP
addresses or whatever from the traced process, or otherwise providing a
mutated syscall environment.

> truss is just a very large nested switch statement with lots of
> printf call - but they are all in userspace.

Yes.  I've got a truss-style tracer that works with my PT_SYSCALL
implementation.  Here's sample output, tracing ls running in the
tracer's source directory.  (The output formatting is closer to SunOS
trace than to Solaris truss or NetBSD ktrace, but the formatting
details are pretty much irrelevant.)

__sysctl (0xeffff480=<6,7>=<HW,PAGESIZE>, 2, 0x10042b98, 0xeffff47c=4, 0x0, 0) = 0: value=4096
mmap (0x0, 32768, 0x3=PROT_READ|PROT_WRITE, 0x1002=MAP_PRIVATE|MAP_ANON, -1, (0,) <0x0,0x0>=0) = 0x10032000
geteuid () = 101
getuid () = 101 (101)
getegid () = 101
getgid () = 101 (101)
open (0x1002fd98="/etc/ld.so.conf", 0x0=O_RDONLY) = -1 ENOENT (No such file or directory)
__stat13 (0x10035040="/local/lib/libtermcap.so.0", 0xeffff5a8) = -1 ENOENT (No such file or directory)
__stat13 (0x10035040="/usr/lib/libtermcap.so.0", 0xeffff5a8) = 0
open (0x10035040="/usr/lib/libtermcap.so.0", 0x0=O_RDONLY) = 3
read (3, 0xeffff570, 52) = 52: "\177ELF\1\2\1\0\0\0\0\0\0\0\0\0\0\3\0\2\0\0\0\1\0\0\v\0\0\0\0004\0\0\"\220\0\0\0\0\0004\0 \0\4\0(\0\30\0\25"
close (3) = 0
open (0x10035040="/usr/lib/libtermcap.so.0", 0x0=O_RDONLY) = 3
__fstat13 (3, 0xeffff678) = 0
read (3, 0xefffe600, 4096) = 4096: "\177ELF\1\2\1\0\0\0\0\0\0\0\0\0\0\3\0\2\0\0\0\1\0\0\v\0\0\0\0004\0\0\"\220\0\0\0\0\0004\0 \0\4\0(\0\30\0\25\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\35P\0\0\35P\0\0\0\5\0\1\0\0\0\0\0\1\0\0\35P\0\1\35P\0\1\35P\0\0\2\304\0\0\3\37\0\0\0\a\0\1\0\0\0\0\0\2\0\0\37\224\0\1\37\224\0\1\37\224\0\0\0\200\0\0\0\200\0\0\0\6\0\0\0\4\0\0\0\4\0\0\35\34\0\0\35\34\0\0\35\34\0\0\0004\0\0\0004\0\0\0\4\0\0\0\4\0\0\0%\0\0\0?\0\0\0\0\0\0\0\23\0\0\0\24\0\0\0\27\0\0\0\36\0\0\0\0\0\0\0008\0\0\0)\0\0\0:\0\0\0007\0\0\0\0\0\0\0\26\0\0\0\0\0\0\0\34\0\0\0009\0\0\0004\0\0\0006"...
mmap (0x0, 77824, 0x5=PROT_READ|PROT_EXEC, 0x2=MAP_PRIVATE|MAP_FILE, 3, (0,) <0x0,0x0>=0) = 0x10080000
mmap (0x10091000, 8192, 0x7=PROT_READ|PROT_WRITE|PROT_EXEC, 0x12=MAP_PRIVATE|MAP_FIXED|MAP_FILE, 3, (0,) <0x0,0x1000>=4096) = 0x10091000
mmap (0x10093000, 0, 0x7=PROT_READ|PROT_WRITE|PROT_EXEC, 0x1012=MAP_PRIVATE|MAP_FIXED|MAP_ANON, -1, (0,) <0x0,0x0>=0) = 0x10093000
munmap (0x10082000, 61440) = 0
close (3) = 0
__stat13 (0x10035080="/local/lib/libc.so.12", 0xeffff5a8) = -1 ENOENT (No such file or directory)
__stat13 (0x10035080="/usr/lib/libc.so.12", 0xeffff5a8) = 0
open (0x10035080="/usr/lib/libc.so.12", 0x0=O_RDONLY) = 3
read (3, 0xeffff570, 52) = 52: "\177ELF\1\2\1\0\0\0\0\0\0\0\0\0\0\3\0\2\0\0\0\1\0\1h0\0\0\0004\0\t\4\360\0\0\0\0\0004\0 \0\4\0(\0?\0<"
close (3) = 0
open (0x10035080="/usr/lib/libc.so.12", 0x0=O_RDONLY) = 3
__fstat13 (3, 0xeffff678) = 0
read (3, 0xefffe600, 4096) = 4096: "\177ELF\1\2\1\0\0\0\0\0\0\0\0\0\0\3\0\2\0\0\0\1\0\1h0\0\0\0004\0\t\4\360\0\0\0\0\0004\0 \0\4\0(\0?\0<\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\a\300\274\0\a\300\274\0\0\0\5\0\1\0\0\0\0\0\1\0\a\300\300\0\b\300\300\0\b\300\300\0\0Q,\0\0\374l\0\0\0\a\0\1\0\0\0\0\0\2\0\b\21l\0\t\21l\0\t\21l\0\0\0\200\0\0\0\200\0\0\0\6\0\0\0\4\0\0\0\4\0\a\300\210\0\a\300\210\0\a\300\210\0\0\0004\0\0\0004\0\0\0\4\0\0\0\4\0\0\4\a\0\0\6\306\0\0\0\343\0\0\0026\0\0\2\313\0\0\3f\0\0\4\311\0\0\4L\0\0\4\317\0\0\3\222\0\0\0\0\0\0\0\234\0\0\5\262\0\0\1:\0\0\5W\0\0\1)\0\0\0\0\0\0\1\4\0\0\0\0"...
mmap (0x0, 638976, 0x5=PROT_READ|PROT_EXEC, 0x2=MAP_PRIVATE|MAP_FILE, 3, (0,) <0x0,0x0>=0) = 0x100c0000
mmap (0x1014c000, 24576, 0x7=PROT_READ|PROT_WRITE|PROT_EXEC, 0x12=MAP_PRIVATE|MAP_FIXED|MAP_FILE, 3, (0,) <0x0,0x7c000>=507904) = 0x1014c000
mmap (0x10152000, 40960, 0x7=PROT_READ|PROT_WRITE|PROT_EXEC, 0x1012=MAP_PRIVATE|MAP_FIXED|MAP_ANON, -1, (0,) <0x0,0x0>=0) = 0x10152000
munmap (0x1013d000, 61440) = 0
close (3) = 0
getuid () = 101 (101)
gettimeofday (0xeffff728, 0x0) = 0: time=<Thu Sep 21 09:05:18 2006 +.621804s>
ioctl (1, 0x402c7413=_IOR('t',19,44)=TIOCGETA, 0xeffff700) = 0: result=<i:0x2b02=BRKINT|ICRNL|IXON|IXANY|IMAXBEL,o:0x7=OPOST|ONLCR|OXTABS,c:0x4b00=CS8|CREAD|HUPCL,l:0x200005cb=ECHOKE|ECHOE|ECHO|ECHOCTL|ISIG|ICANON|IEXTEN|PENDIN,cc:<eof=^Z,!eol,!eol2,erase=^?,werase=^W,kill=^X,reprint=^R,intr=^C,quit=^\,susp=^P,!dsusp,start=^Q,stop=^S,lnext=^V,discard=^O,status=^T,min=1,time=0>>
__sysctl (0xeffff1a0=<6,7>=<HW,PAGESIZE>, 2, 0xeffff19c, 0xeffff198=4, 0x0, 0) = 0: value=4096
readlink (0x1013be40="/etc/malloc.conf", 0xeffff228, 63) = -1 ENOENT (No such file or directory)
mmap (0x0, 4096, 0x3=PROT_READ|PROT_WRITE, 0x1002=MAP_PRIVATE|MAP_ANON, -1, (0,) <0x0,0x0>=0) = 0x1003a000
break (0x25d10) = 0
break (0x25d10) = 0
break (0x27000) = 0
break (0x27000) = 0
break (0x28000) = 0
break (0x28000) = 0
break (0x29000) = 0
break (0x29000) = 0
break (0x2a000) = 0
madvise (0x29000, 4096, 60x6=MADV_FREE) = 0
madvise (0x29000, 4096, 60x6=MADV_FREE) = 0
madvise (0x29000, 4096, 60x6=MADV_FREE) = 0
ioctl (1, 0x40087468=_IOR('t',104,8)=TIOCGWINSZ, 0xeffff728) = 0: result=<80x24/0x0>
ioctl (1, 0x40087468=_IOR('t',104,8)=TIOCGWINSZ, 0xeffff7a8) = 0: result=<80x24/0x0>
__lstat13 (0x23e60=".", 0xeffff2b8) = 0
open (0x23e60=".", 0x4=O_RDONLY|O_NONBLOCK) = 3
__fstat13 (3, 0xeffff560) = 0
fcntl (3, 0x2=F_SETFD, 1) = 0
__sysctl (0xeffff3f0=<6,7>=<HW,PAGESIZE>, 2, 0x1015b0e0, 0xeffff3ec=4, 0x0, 0) = 0: value=4096
fstatfs (3, 0xeffff460) = 0: statfs=<type=0,flags=0x20005000=MNT_LOCAL|MNT_ROOTFS|MNT_SYMPERM,bsize=1024,iosize=8192,blocks=16932425,bfree=1216839,bavail=370217,files=4246270,ffree=3765736,fsid=000007030000078b,owner=0,fstype=ffs,mnton=/,mntfrom=/dev/sd0d>
break (0x2a000) = 0
break (0x2b000) = 0
break (0x2b000) = 0
break (0x2c000) = 0
lseek (3, (0,) <0x0,0x0>=0, 0x1=L_INCR) = <0x0,0x0>=0
getdents (3, 0x2a000, 4096) = 3584: .:d:2659399 ..:d:1160064 fragments:d:2670341 .depend:-:2659400 C+chflags.c:-:2659401 C+chmod.c:-:2659402 C+chown.c:-:2659403 C+fork.c:-:2659404 C+itimer.c:-:2659405 C+memory.c:-:2659406 C+rlimit.c:-:2659407 C+sig.c:-:2659408 C+socket.c:-:2659409 C+stat.c:-:2659410 C+statfs.c:-:2659411 C+timeofday.c:-:2659412 C+ugid.c:-:2659413 C+uio.c:-:2659414 C__sysctl.c:-:2659415 Caccess.c:-:2659416 Cadjtime.c:-:2659417 Cchdir.c:-:2659418 Cchroot.c:-:2659419 Cclose.c:-:2659420 Cdup.c:-:2659421 Cdup2.c:-:2659422 Cexecve.c:-:2659423 Cexit.c:-:2659424 Cfchdir.c:-:2659425 Cfcntl.c:-:2659426 Cgetpgrp.c:-:2659427 Cgetdirentries.c:-:2659428 Cgetpid.c:-:2659429 Cgetppid.c:-:2659430 Ckill.c:-:2659431 Clseek.c:-:2659432 C_43_ogetdtablesize.c:-:2659433 C_43_ogethostname.c:-:2659434 C_43_ogetpagesize.c:-:2659435 C_43_olseek.c:-:2659436 Copen.c:-:2659437 Cpipe.c:-:2659438 Cptrace.c:-:2659439 Cread.c:-:2659440 Crename.c:-:2659441 Cselect.c:-:2659442 Csun_execv.c:-:265
 9!
443 Csun_getdents.c:-:2659444 Csun_mmap.c:-:2659445 Cpoll.c:-:2659446 Csun_open.c:-:2659447 Cwait4.c:-:2659448 Cwrite.c:-:2659449 Makefile:-:2659450 callinfo.h:-:2659451 calls.list.sunos:-:2659452 callutil-iovec.c:-:2659453 callutil-resource.c:-:2659454 callutil-signal.c:-:2659455 callutil.c:-:2659456 callutil-socket.c:-:2659457 callutil-timeval.c:-:2659458 callutil.h:-:2659459 errors.list:-:2659460 errvec.h:-:2659461 externs.h:-:2659462 globals.c:-:2659463 globals.h:-:2659464 makecallvecs:-:2659465 makeerrvec:-:2659466 makenetbsdcalls:-:2659467 trc.c:-:2659468 unimpl.c:-:2659469 Cbreak.c:-:2659470 Cmkdir.c:-:2659471 Creadlink.c:-:2659472 Cioctl.c:-:2659473 ioctl-list:-:2659474 ioctls.h:-:2659475 make-ioctls:-:2659476 ioctls.c:-:2659477 ioc-termios.c:-:2659478 machdep-sparc.c:-:2659479 machdep.h:-:2659480 ioc-winsize.c:-:2659481 ioc-ttymisc.c:-:2659482 Cgetdents.c:-:2659483 calls.list.netbsd:-:2659484 calls.list:-:2659485 callvecs.c:-:2659486 callinfo-externs.h:-:2659487 errv
 e!
c.c:-:2659488 C+chflags.o:-:2659489 C+chmod.o:-:2659490 C+chown.o:-:2659491 C+fork.o:-:2659492 C+itimer.o:-:2659493 C+memory.o:-:2659494 C+rlimit.o:-:2659495 C+sig.o:-:2659496 C+socket.o:-:2659497 C+stat.o:-:2659498 C+statfs.o:-:2659499 C+timeofday.o:-:2659500 C+ugid.o:-:2659501 C+uio.o:-:2659502 C_43_ogetdtablesize.o:-:2659503 C_43_ogethostname.o:-:2659504 C_43_ogetpagesize.o:-:2659505 C_43_olseek.o:-:2659506 C__sysctl.o:-:2659507 Caccess.o:-:2659508 Cadjtime.o:-:2659509 Cbreak.o:-:2659510 Cchdir.o:-:2659511 Cchroot.o:-:2659512 Cclose.o:-:2659513 Cdup.o:-:2659514 Cdup2.o:-:2659515 Cexecve.o:-:2659516 Cexit.o:-:2659517 Cfchdir.o:-:2659518 Ckill.o:-:2659519 Cfcntl.o:-:2659520 Cgetdents.o:-:2659521 Cgetdirentries.o:-:2659522 Cgetpgrp.o:-:2659523 Cgetpid.o:-:2659524 Cgetppid.o:-:2659525 Cioctl.o:-:2659526 Clseek.o:-:2659527 Cmkdir.o:-:2659528 Copen.o:-:2659529 Cpipe.o:-:2659530 Cpoll.o:-:2659531 Cptrace.o:-:2659532 Cread.o:-:2659533 Creadlink.o:-:2659534 Crename.o:-:2659535 Csel
 e!
ct.o:-:2659536 Cwait4.o:-:2659537 Cwrite.o:-:2659538 callvecs.o:-:2659539 ioctls.o:-:2659540 unimpl.o:-:2659541 ioc-termios.o:-:2659542 ioc-ttymisc.o:-:2659543 ioc-winsize.o:-:2659544 callutil-iovec.o:-:2659545 callutil-resource.o:-:2659546 callutil-signal.o:-:2659547 callutil-socket.o:-:2659548 callutil-timeval.o:-:2659549 callutil.o:-:2659550 errvec.o:-:2659551 globals.o:-:2659552 machdep-sparc.o:-:2659553 trc.o:-:2659554 trc:-:2659555 z:-:2660404
break (0x2c000) = 0
break (0x2d000) = 0
madvise (0x2b000, 4096, 60x6=MADV_FREE) = 0
break (0x2d000) = 0
break (0x2f000) = 0
madvise (0x2c000, 4096, 60x6=MADV_FREE) = 0
break (0x2f000) = 0
break (0x32000) = 0
madvise (0x2d000, 8192, 60x6=MADV_FREE) = 0
lseek (3, (0,) <0x0,0x0>=0, 0x1=L_INCR) = <0x0,0xe00>=3584
getdents (3, 0x2a000, 4096) = 0:
lseek (3, (0,) <0x0,0x0>=0, 0x0=L_SET) = <0x0,0x0>=0
madvise (0x2a000, 4096, 60x6=MADV_FREE) = 0
close (3) = 0
__fstat13 (1, 0xefffed40) = 0
break (0x32000) = 0
break (0x42000) = 0
ioctl (1, 0x402c7413=_IOR('t',19,44)=TIOCGETA, 0xefffed78) = 0: result=<i:0x2b02=BRKINT|ICRNL|IXON|IXANY|IMAXBEL,o:0x7=OPOST|ONLCR|OXTABS,c:0x4b00=CS8|CREAD|HUPCL,l:0x200005cb=ECHOKE|ECHOE|ECHO|ECHOCTL|ISIG|ICANON|IEXTEN|PENDIN,cc:<eof=^Z,!eol,!eol2,erase=^?,werase=^W,kill=^X,reprint=^R,intr=^C,quit=^\,susp=^P,!dsusp,start=^Q,stop=^S,lnext=^V,discard=^O,status=^T,min=1,time=0>>
write (1, 0x32000=".depend               Caccess.o        Cmkdir.o            callutil-timeval.o\n", 78) = 78
write (1, 0x32000="C+chflags.c           Cadjtime.c       Copen.c             callutil.c\n", 70) = 70
write (1, 0x32000="C+chflags.o           Cadjtime.o       Copen.o             callutil.h\n", 70) = 70
write (1, 0x32000="C+chmod.c             Cbreak.c         Cpipe.c             callutil.o\n", 70) = 70
write (1, 0x32000="C+chmod.o             Cbreak.o         Cpipe.o             callvecs.c\n", 70) = 70
write (1, 0x32000="C+chown.c             Cchdir.c         Cpoll.c             callvecs.o\n", 70) = 70
write (1, 0x32000="C+chown.o             Cchdir.o         Cpoll.o             errors.list\n", 71) = 71
write (1, 0x32000="C+fork.c              Cchroot.c        Cptrace.c           errvec.c\n", 68) = 68
write (1, 0x32000="C+fork.o              Cchroot.o        Cptrace.o           errvec.h\n", 68) = 68
write (1, 0x32000="C+itimer.c            Cclose.c         Cread.c             errvec.o\n", 68) = 68
write (1, 0x32000="C+itimer.o            Cclose.o         Cread.o             externs.h\n", 69) = 69
write (1, 0x32000="C+memory.c            Cdup.c           Creadlink.c         fragments\n", 69) = 69
write (1, 0x32000="C+memory.o            Cdup.o           Creadlink.o         globals.c\n", 69) = 69
write (1, 0x32000="C+rlimit.c            Cdup2.c          Crename.c           globals.h\n", 69) = 69
write (1, 0x32000="C+rlimit.o            Cdup2.o          Crename.o           globals.o\n", 69) = 69
write (1, 0x32000="C+sig.c               Cexecve.c        Cselect.c           ioc-termios.c\n", 73) = 73
write (1, 0x32000="C+sig.o               Cexecve.o        Cselect.o           ioc-termios.o\n", 73) = 73
write (1, 0x32000="C+socket.c            Cexit.c          Csun_execv.c        ioc-ttymisc.c\n", 73) = 73
write (1, 0x32000="C+socket.o            Cexit.o          Csun_getdents.c     ioc-ttymisc.o\n", 73) = 73
write (1, 0x32000="C+stat.c              Cfchdir.c        Csun_mmap.c         ioc-winsize.c\n", 73) = 73
write (1, 0x32000="C+stat.o              Cfchdir.o        Csun_open.c         ioc-winsize.o\n", 73) = 73
write (1, 0x32000="C+statfs.c            Cfcntl.c         Cwait4.c            ioctl-list\n", 70) = 70
write (1, 0x32000="C+statfs.o            Cfcntl.o         Cwait4.o            ioctls.c\n", 68) = 68
write (1, 0x32000="C+timeofday.c         Cgetdents.c      Cwrite.c            ioctls.h\n", 68) = 68
write (1, 0x32000="C+timeofday.o         Cgetdents.o      Cwrite.o            ioctls.o\n", 68) = 68
write (1, 0x32000="C+ugid.c              Cgetdirentries.c Makefile            machdep-sparc.c\n", 75) = 75
write (1, 0x32000="C+ugid.o              Cgetdirentries.o callinfo-externs.h  machdep-sparc.o\n", 75) = 75
write (1, 0x32000="C+uio.c               Cgetpgrp.c       callinfo.h          machdep.h\n", 69) = 69
write (1, 0x32000="C+uio.o               Cgetpgrp.o       calls.list          make-ioctls\n", 71) = 71
write (1, 0x32000="C_43_ogetdtablesize.c Cgetpid.c        calls.list.netbsd   makecallvecs\n", 72) = 72
write (1, 0x32000="C_43_ogetdtablesize.o Cgetpid.o        calls.list.sunos    makeerrvec\n", 70) = 70
write (1, 0x32000="C_43_ogethostname.c   Cgetppid.c       callutil-iovec.c    makenetbsdcalls\n", 75) = 75
write (1, 0x32000="C_43_ogethostname.o   Cgetppid.o       callutil-iovec.o    trc\n", 63) = 63
write (1, 0x32000="C_43_ogetpagesize.c   Cioctl.c         callutil-resource.c trc.c\n", 65) = 65
write (1, 0x32000="C_43_ogetpagesize.o   Cioctl.o         callutil-resource.o trc.o\n", 65) = 65
write (1, 0x32000="C_43_olseek.c         Ckill.c          callutil-signal.c   unimpl.c\n", 68) = 68
write (1, 0x32000="C_43_olseek.o         Ckill.o          callutil-signal.o   unimpl.o\n", 68) = 68
write (1, 0x32000="C__sysctl.c           Clseek.c         callutil-socket.c   z\n", 61) = 61
write (1, 0x32000="C__sysctl.o           Clseek.o         callutil-socket.o\n", 57) = 57
write (1, 0x32000="Caccess.c             Cmkdir.c         callutil-timeval.c\n", 58) = 58
madvise (0x2f000, 12288, 60x6=MADV_FREE) = 0
exit (0) = [exited with status 0]

/~\ The ASCII				der Mouse
\ / Ribbon Campaign
 X  Against HTML	       mouse@rodents.montreal.qc.ca
/ \ Email!	     7D C8 61 52 5D E7 2D 39  4E F1 31 3E E8 B3 27 4B