NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

kern/45426: better logging for core dumps and process termination, including full core pathname



>Number:         45426
>Category:       kern
>Synopsis:       better logging for core dumps and process termination, 
>including full core pathname
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Tue Oct 04 20:20:00 +0000 2011
>Originator:     Greg A. Woods
>Release:        NetBSD-current 2011/10/04
>Organization:
Planix, Inc.; Kelowna, BC; Canada
>Environment:
System: NetBSD
Architecture: all
Machine: all
>Description:

        I had a very strong desire for the kernel to log where and when
        it left core droppings from aborted processes, and also for it
        to be quite a bit more intelligent and intelligible about why a
        core dump might have failed.

>How-To-Repeat:

>Fix:

        (untested, except in netbsd-5 and prior where it is well tested)

Index: sys/kern/sys_process.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/kern/sys_process.c,v
retrieving revision 1.162
diff -u -r1.162 sys_process.c
--- sys/kern/sys_process.c      6 Sep 2011 11:22:42 -0000       1.162
+++ sys/kern/sys_process.c      4 Oct 2011 18:06:42 -0000
@@ -130,6 +130,7 @@
 #include <sys/ptrace.h>
 #include <sys/uio.h>
 #include <sys/ras.h>
+#include <sys/syslog.h>
 #include <sys/kmem.h>
 #include <sys/kauth.h>
 #include <sys/mount.h>
@@ -563,6 +564,10 @@
                break;
 
        case  PT_DUMPCORE:
+       {
+               const char     *cdwhy;
+               int             cdlp = LOG_DEBUG;
+
                if ((path = SCARG(uap, addr)) != NULL) {
                        char *dst;
                        len = SCARG(uap, data);
@@ -578,11 +583,62 @@
                        }
                        path = dst;
                        path[len] = '\0';
+               } else {
+                       error = EINVAL;
+                       break;
                }
-               error = (*coredump_vec)(lt, path);
+               error = (*coredump_vec)(lt, path, NULL);
+               /*
+                * XXX this logging code is much the same as the logging code
+                * in kern_sig.c:sigexit() -- perhaps it should be in coredump()
+                */
+               switch (error) {
+               case 0:
+                       cdwhy = "dumped core";
+                       cdlp = LOG_DEBUG;
+                       break;
+               case EAUTH:
+                       cdwhy = "was set-id, core dump not permitted";
+                       cdlp = LOG_ERR;
+                       break;
+               case EFBIG:
+                       cdwhy = "core dump would exceed rlimit";
+                       cdlp = LOG_NOTICE;
+                       break;
+               case EINVAL:
+                       cdwhy = "core dump not permitted over non-regular file";
+                       cdlp = LOG_WARNING;
+                       break;
+               case ENXIO:
+                       cdwhy = "filesystem mount flag prevented core dump";
+                       cdlp = LOG_NOTICE;
+                       break;
+               case EPERM:
+                       cdwhy = "core dump not permitted";
+                       cdlp = LOG_WARNING;
+                       break;
+               default:
+                       /* XXX too bad strerror() isn't in the kernel.... */
+                       cdwhy = "core dump failed";
+                       cdlp = LOG_NOTICE;
+                       break;
+               }
+               log(cdlp, "%s: pid %d [eid %d:%d, rid %d:%d, svid %d:%d] 
PT_DUMPCORE: %s [in %s]\n",
+                   p->p_comm,
+                   p->p_pid,
+                   kauth_cred_geteuid(l->l_cred),
+                   kauth_cred_getegid(l->l_cred),
+                   kauth_cred_getuid(l->l_cred),
+                   kauth_cred_getgid(l->l_cred),
+                   kauth_cred_getsvuid(l->l_cred),
+                   kauth_cred_getsvgid(l->l_cred),
+                   cdwhy,
+                   path);
+               free(path, M_TEMP);
                if (path)
                        kmem_free(path, len + 1);
                break;
+       }
 
 #ifdef PT_STEP
        case  PT_STEP:
Index: sys/kern/kern_sig.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/kern/kern_sig.c,v
retrieving revision 1.316
diff -u -r1.316 kern_sig.c
--- sys/kern/kern_sig.c 16 Sep 2011 22:07:17 -0000      1.316
+++ sys/kern/kern_sig.c 4 Oct 2011 18:03:41 -0000
@@ -95,6 +95,8 @@
 #include <sys/savar.h>
 #include <sys/exec.h>
 #include <sys/kauth.h>
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
 #include <sys/acct.h>
 #include <sys/callout.h>
 #include <sys/atomic.h>
@@ -170,11 +172,6 @@
 int    kern_logsigexit = 0;
 #endif
 
-static const char logcoredump[] =
-    "pid %d (%s), uid %d: exited on signal %d (core dumped)\n";
-static const char lognocoredump[] =
-    "pid %d (%s), uid %d: exited on signal %d (core not dumped, err = %d)\n";
-
 static kauth_listener_t signal_listener;
 
 static int
@@ -1901,14 +1898,11 @@
                         * Don't take default actions on system processes.
                         */
                        if (p->p_pid <= 1) {
-#ifdef DIAGNOSTIC
                                /*
-                                * Are you sure you want to ignore SIGSEGV
-                                * in init? XXX
+                                * warn of SIGSEGV in init!
                                 */
-                               printf_nolog("Process (pid %d) got sig %d\n",
+                               printf("Process (pid %d) got sig %d\n",
                                    p->p_pid, signo);
-#endif
                                continue;
                        }
 
@@ -2120,8 +2114,21 @@
 
        KASSERT(mutex_owned(proc_lock));
 
-       log(LOG_ERR, "pid %d was killed: %s\n", p->p_pid, why);
-       uprintf_locked("sorry, pid %d was killed: %s\n", p->p_pid, why);
+       /* XXX could log cwd here too, but maybe not much use... */
+       /* XXX should use lesser priority for "orphaned, traced, process." and 
maybe others */
+       /* XXX should use PRIdUID if it is ever provided */
+       log(LOG_ERR, "%s: pid %d [eid %d:%d, rid %d:%d, svid %d:%d] was killed: 
%s\n",
+           p->p_comm,
+           p->p_pid,
+           kauth_cred_geteuid(p->p_cred),
+           kauth_cred_getegid(p->p_cred),
+           kauth_cred_getuid(p->p_cred),
+           kauth_cred_getgid(p->p_cred),
+           kauth_cred_getsvuid(p->p_cred),
+           kauth_cred_getsvgid(p->p_cred),
+           why);
+       uprintf_locked("sorry, pid %d, command %s, was killed: %s\n", p->p_pid, 
p->p_comm, why);
+
        psignal(p, SIGKILL);
 }
 
@@ -2206,20 +2213,107 @@
        p->p_sigctx.ps_signo = signo;
 
        if (docore) {
+               /* XXX should probably use PNBUF_GET() for these! */
+               char            cdcwdbuf[MAXPATHLEN]; /* XXX too bad we don't 
have asprintf() in the kernel */
+               char            corename[MAXPATHLEN];
+               char           *cdcwdbp;
+               const char     *cdwhy;
+               int             cdlp = LOG_DEBUG;
+
                mutex_exit(p->p_lock);
-               if ((error = (*coredump_vec)(l, NULL)) == 0)
+               if ((error = (*coredump_vec)(l, NULL, corename)) == 0)
                        exitsig |= WCOREFLAG;
 
+               switch (error) {
+               case 0:
+                       cdwhy = "dumped core";
+                       cdlp = LOG_DEBUG;
+                       break;
+               case EAUTH:
+                       cdwhy = "was set-id, core dump not permitted";
+                       cdlp = LOG_ERR;
+                       break;
+               case EFBIG:
+                       cdwhy = "core dump would exceed rlimit";
+                       cdlp = LOG_NOTICE;
+                       break;
+               case EINVAL:
+                       cdwhy = "core dump not permitted over non-regular file";
+                       cdlp = LOG_WARNING;
+                       break;
+               case ENXIO:
+                       cdwhy = "filesystem mount flag prevented core dump";
+                       cdlp = LOG_NOTICE;
+                       break;
+               case EPERM:
+                       cdwhy = "core dump not permitted";
+                       cdlp = LOG_WARNING;
+                       break;
+               default:
+                       /* XXX too bad strerror() isn't in the kernel.... */
+                       cdwhy = "core dump failed";
+                       cdlp = LOG_NOTICE;
+                       break;
+               }
+               cdcwdbp = &cdcwdbuf[MAXPATHLEN];
+               *(--cdcwdbp) = '\0';
+               /*
+                * getcwd_common() is a bit of a hack -- but it's better than
+                * nothing!  It comes from vfs_getcwd.c.  (put in 
getcwd_common(9))
+                *
+                * The first arg is a pointer to the process' cwd vnode.
+                *
+                * The second arg is always a NULL vnode pointer.
+                *
+                * The name is placed in the *end* of the buffer which starts
+                * at the pointer passed in the 4th arg.  The pointer passed in
+                * by reference in the 3rd arg has to point at the end of the
+                * buffer and it is is moved back to point at the beginning of
+                * the resulting pathname.  The buffer has to be NUL
+                * terminated(?).
+                *
+                * The 5th argument here is "max number of vnodes to traverse".
+                * Since each entry takes up at least 2 bytes in the output
+                * buffer, limit it to N/2 vnodes for an N byte buffer.
+                * 
+                * The 6th arg is for flags (always 0 here).
+                *
+                * The 7th arg is a pointer to the lwp structure.
+                */
+               if (getcwd_common(p->p_cwdi->cwdi_cdir,
+                                 (struct vnode *) NULL, &cdcwdbp,
+                                 cdcwdbuf, sizeof(cdcwdbuf)/2, 0, l))
+                       cdcwdbp = __UNCONST("(error getting process cwd)");
+
                if (kern_logsigexit) {
-                       int uid = l->l_cred ?
-                           (int)kauth_cred_geteuid(l->l_cred) : -1;
+                       kauth_cred_t    cred;
 
-                       if (error)
-                               log(LOG_INFO, lognocoredump, p->p_pid,
-                                   p->p_comm, uid, signo, error);
-                       else
-                               log(LOG_INFO, logcoredump, p->p_pid,
-                                   p->p_comm, uid, signo);
+                       cred = l->l_cred;
+                       /* XXX should use PRIdUID if it is ever provided */
+                       log(cdlp, "%s: pid %d [eid %d:%d, rid %d:%d, svid 
%d:%d] exited on signal %d: %s [in %s%s%s]\n",
+                           p->p_comm,
+                           p->p_pid,
+                           cred ? kauth_cred_geteuid(cred) : -1,
+                           cred ? kauth_cred_getegid(cred) : -1,
+                           cred ? kauth_cred_getuid(cred) : -1,
+                           cred ? kauth_cred_getgid(cred) : -1,
+                           cred ? kauth_cred_getsvuid(cred) : -1,
+                           cred ? kauth_cred_getsvgid(cred) : -1,
+                           signo, /* XXX too bad strsignal() isn't in the 
kernel.... */
+                           cdwhy,
+                           (error != 0 || (corename && *corename && *corename 
!= '/')) ? cdcwdbp : "",
+                           (error == 0 && *corename != '/') ? "/" : "",
+                           (error == 0) ? corename : "");
+               }
+               if (cdlp != LOG_DEBUG) { /* tell the user of really abnormal 
events */
+                       uprintf("%s: pid %d exited on signal %d: %s [in 
%s%s%s]\n",
+                               p->p_comm,
+                               p->p_pid,
+                               signo, /* XXX strsignal() */
+                               cdwhy,
+                               (error != 0 || (corename && *corename && 
*corename != '/')) ? cdcwdbp : "",
+                               (error == 0 && *corename != '/') ? "/" : "",
+                               (error == 0) ? corename : "");
                }
 
 #ifdef PAX_SEGVGUARD
Index: sys/kern/kern_core.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/kern/kern_core.c,v
retrieving revision 1.20
diff -u -r1.20 kern_core.c
--- sys/kern/kern_core.c        24 Sep 2011 22:53:50 -0000      1.20
+++ sys/kern/kern_core.c        4 Oct 2011 17:53:46 -0000
@@ -60,7 +60,7 @@
        off_t io_offset;
 };
 
-static int     coredump(struct lwp *, const char *);
+static int     coredump(struct lwp *, const char *, char *);
 static int     coredump_buildname(struct proc *, char *, const char *, size_t);
 
 static int
@@ -90,7 +90,7 @@
  * value of shortcorename), unless the process was setuid/setgid.
  */
 static int
-coredump(struct lwp *l, const char *pattern)
+coredump(struct lwp *l, const char *pattern, char *corename)
 {
        struct vnode            *vp;
        struct proc             *p;
@@ -105,6 +105,10 @@
        char                    *name, *lastslash;
 
        name = PNBUF_GET();
+       if (name == NULL) {
+               error = ENOMEM;
+               goto done;
+       }
 
        p = l->l_proc;
        vm = p->p_vmspace;
@@ -119,7 +123,7 @@
         */
        if (USPACE + ctob(vm->vm_dsize + vm->vm_ssize) >=
            p->p_rlimit[RLIMIT_CORE].rlim_cur) {
-               error = EFBIG;          /* better error code? */
+               error = EFBIG;
                mutex_exit(p->p_lock);
                mutex_exit(proc_lock);
                goto done;
@@ -138,12 +142,12 @@
         */
        if (p->p_flag & PK_SUGID) {
                if (!security_setidcore_dump) {
-                       error = EPERM;
+                       error = EAUTH;
                        mutex_exit(p->p_lock);
                        mutex_exit(proc_lock);
                        goto done;
                }
-               pattern = security_setidcore_path;
+               pattern = security_setidcore_path; /* is permitted to be NULL */
        }
 
        /* Lock, as p_limit and pl_corename might change. */
@@ -152,6 +156,7 @@
        if (pattern == NULL) {
                pattern = lim->pl_corename;
        }
+       KASSERT(pattern); /* otherwise name is "", what will vn_open() do? */
        error = coredump_buildname(p, name, pattern, MAXPATHLEN);
        mutex_exit(&lim->pl_lock);
 
@@ -254,8 +259,11 @@
        if (error == 0)
                error = error1;
 done:
-       if (name != NULL)
+       if (name != NULL) {
+               if (corename)
+                       strcpy(corename, name);
                PNBUF_PUT(name);
+       }
        return error;
 }
 



Home | Main Index | Thread Index | Old Index