NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
kern/45426: better logging for core dumps and process termination, including full core pathname
>Number: 45426
>Category: kern
>Synopsis: better logging for core dumps and process termination,
>including full core pathname
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: kern-bug-people
>State: open
>Class: change-request
>Submitter-Id: net
>Arrival-Date: Tue Oct 04 20:20:00 +0000 2011
>Originator: Greg A. Woods
>Release: NetBSD-current 2011/10/04
>Organization:
Planix, Inc.; Kelowna, BC; Canada
>Environment:
System: NetBSD
Architecture: all
Machine: all
>Description:
I had a very strong desire for the kernel to log where and when
it left core droppings from aborted processes, and also for it
to be quite a bit more intelligent and intelligible about why a
core dump might have failed.
>How-To-Repeat:
>Fix:
(untested, except in netbsd-5 and prior where it is well tested)
Index: sys/kern/sys_process.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/kern/sys_process.c,v
retrieving revision 1.162
diff -u -r1.162 sys_process.c
--- sys/kern/sys_process.c 6 Sep 2011 11:22:42 -0000 1.162
+++ sys/kern/sys_process.c 4 Oct 2011 18:06:42 -0000
@@ -130,6 +130,7 @@
#include <sys/ptrace.h>
#include <sys/uio.h>
#include <sys/ras.h>
+#include <sys/syslog.h>
#include <sys/kmem.h>
#include <sys/kauth.h>
#include <sys/mount.h>
@@ -563,6 +564,10 @@
break;
case PT_DUMPCORE:
+ {
+ const char *cdwhy;
+ int cdlp = LOG_DEBUG;
+
if ((path = SCARG(uap, addr)) != NULL) {
char *dst;
len = SCARG(uap, data);
@@ -578,11 +583,62 @@
}
path = dst;
path[len] = '\0';
+ } else {
+ error = EINVAL;
+ break;
}
- error = (*coredump_vec)(lt, path);
+ error = (*coredump_vec)(lt, path, NULL);
+ /*
+ * XXX this logging code is much the same as the logging code
+ * in kern_sig.c:sigexit() -- perhaps it should be in coredump()
+ */
+ switch (error) {
+ case 0:
+ cdwhy = "dumped core";
+ cdlp = LOG_DEBUG;
+ break;
+ case EAUTH:
+ cdwhy = "was set-id, core dump not permitted";
+ cdlp = LOG_ERR;
+ break;
+ case EFBIG:
+ cdwhy = "core dump would exceed rlimit";
+ cdlp = LOG_NOTICE;
+ break;
+ case EINVAL:
+ cdwhy = "core dump not permitted over non-regular file";
+ cdlp = LOG_WARNING;
+ break;
+ case ENXIO:
+ cdwhy = "filesystem mount flag prevented core dump";
+ cdlp = LOG_NOTICE;
+ break;
+ case EPERM:
+ cdwhy = "core dump not permitted";
+ cdlp = LOG_WARNING;
+ break;
+ default:
+ /* XXX too bad strerror() isn't in the kernel.... */
+ cdwhy = "core dump failed";
+ cdlp = LOG_NOTICE;
+ break;
+ }
+ log(cdlp, "%s: pid %d [eid %d:%d, rid %d:%d, svid %d:%d]
PT_DUMPCORE: %s [in %s]\n",
+ p->p_comm,
+ p->p_pid,
+ kauth_cred_geteuid(l->l_cred),
+ kauth_cred_getegid(l->l_cred),
+ kauth_cred_getuid(l->l_cred),
+ kauth_cred_getgid(l->l_cred),
+ kauth_cred_getsvuid(l->l_cred),
+ kauth_cred_getsvgid(l->l_cred),
+ cdwhy,
+ path);
+ free(path, M_TEMP);
if (path)
kmem_free(path, len + 1);
break;
+ }
#ifdef PT_STEP
case PT_STEP:
Index: sys/kern/kern_sig.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/kern/kern_sig.c,v
retrieving revision 1.316
diff -u -r1.316 kern_sig.c
--- sys/kern/kern_sig.c 16 Sep 2011 22:07:17 -0000 1.316
+++ sys/kern/kern_sig.c 4 Oct 2011 18:03:41 -0000
@@ -95,6 +95,8 @@
#include <sys/savar.h>
#include <sys/exec.h>
#include <sys/kauth.h>
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
#include <sys/acct.h>
#include <sys/callout.h>
#include <sys/atomic.h>
@@ -170,11 +172,6 @@
int kern_logsigexit = 0;
#endif
-static const char logcoredump[] =
- "pid %d (%s), uid %d: exited on signal %d (core dumped)\n";
-static const char lognocoredump[] =
- "pid %d (%s), uid %d: exited on signal %d (core not dumped, err = %d)\n";
-
static kauth_listener_t signal_listener;
static int
@@ -1901,14 +1898,11 @@
* Don't take default actions on system processes.
*/
if (p->p_pid <= 1) {
-#ifdef DIAGNOSTIC
/*
- * Are you sure you want to ignore SIGSEGV
- * in init? XXX
+ * warn of SIGSEGV in init!
*/
- printf_nolog("Process (pid %d) got sig %d\n",
+ printf("Process (pid %d) got sig %d\n",
p->p_pid, signo);
-#endif
continue;
}
@@ -2120,8 +2114,21 @@
KASSERT(mutex_owned(proc_lock));
- log(LOG_ERR, "pid %d was killed: %s\n", p->p_pid, why);
- uprintf_locked("sorry, pid %d was killed: %s\n", p->p_pid, why);
+ /* XXX could log cwd here too, but maybe not much use... */
+ /* XXX should use lesser priority for "orphaned, traced, process." and
maybe others */
+ /* XXX should use PRIdUID if it is ever provided */
+ log(LOG_ERR, "%s: pid %d [eid %d:%d, rid %d:%d, svid %d:%d] was killed:
%s\n",
+ p->p_comm,
+ p->p_pid,
+ kauth_cred_geteuid(p->p_cred),
+ kauth_cred_getegid(p->p_cred),
+ kauth_cred_getuid(p->p_cred),
+ kauth_cred_getgid(p->p_cred),
+ kauth_cred_getsvuid(p->p_cred),
+ kauth_cred_getsvgid(p->p_cred),
+ why);
+ uprintf_locked("sorry, pid %d, command %s, was killed: %s\n", p->p_pid,
p->p_comm, why);
+
psignal(p, SIGKILL);
}
@@ -2206,20 +2213,107 @@
p->p_sigctx.ps_signo = signo;
if (docore) {
+ /* XXX should probably use PNBUF_GET() for these! */
+ char cdcwdbuf[MAXPATHLEN]; /* XXX too bad we don't
have asprintf() in the kernel */
+ char corename[MAXPATHLEN];
+ char *cdcwdbp;
+ const char *cdwhy;
+ int cdlp = LOG_DEBUG;
+
mutex_exit(p->p_lock);
- if ((error = (*coredump_vec)(l, NULL)) == 0)
+ if ((error = (*coredump_vec)(l, NULL, corename)) == 0)
exitsig |= WCOREFLAG;
+ switch (error) {
+ case 0:
+ cdwhy = "dumped core";
+ cdlp = LOG_DEBUG;
+ break;
+ case EAUTH:
+ cdwhy = "was set-id, core dump not permitted";
+ cdlp = LOG_ERR;
+ break;
+ case EFBIG:
+ cdwhy = "core dump would exceed rlimit";
+ cdlp = LOG_NOTICE;
+ break;
+ case EINVAL:
+ cdwhy = "core dump not permitted over non-regular file";
+ cdlp = LOG_WARNING;
+ break;
+ case ENXIO:
+ cdwhy = "filesystem mount flag prevented core dump";
+ cdlp = LOG_NOTICE;
+ break;
+ case EPERM:
+ cdwhy = "core dump not permitted";
+ cdlp = LOG_WARNING;
+ break;
+ default:
+ /* XXX too bad strerror() isn't in the kernel.... */
+ cdwhy = "core dump failed";
+ cdlp = LOG_NOTICE;
+ break;
+ }
+ cdcwdbp = &cdcwdbuf[MAXPATHLEN];
+ *(--cdcwdbp) = '\0';
+ /*
+ * getcwd_common() is a bit of a hack -- but it's better than
+ * nothing! It comes from vfs_getcwd.c. (put in
getcwd_common(9))
+ *
+ * The first arg is a pointer to the process' cwd vnode.
+ *
+ * The second arg is always a NULL vnode pointer.
+ *
+ * The name is placed in the *end* of the buffer which starts
+ * at the pointer passed in the 4th arg. The pointer passed in
+ * by reference in the 3rd arg has to point at the end of the
+ * buffer and it is is moved back to point at the beginning of
+ * the resulting pathname. The buffer has to be NUL
+ * terminated(?).
+ *
+ * The 5th argument here is "max number of vnodes to traverse".
+ * Since each entry takes up at least 2 bytes in the output
+ * buffer, limit it to N/2 vnodes for an N byte buffer.
+ *
+ * The 6th arg is for flags (always 0 here).
+ *
+ * The 7th arg is a pointer to the lwp structure.
+ */
+ if (getcwd_common(p->p_cwdi->cwdi_cdir,
+ (struct vnode *) NULL, &cdcwdbp,
+ cdcwdbuf, sizeof(cdcwdbuf)/2, 0, l))
+ cdcwdbp = __UNCONST("(error getting process cwd)");
+
if (kern_logsigexit) {
- int uid = l->l_cred ?
- (int)kauth_cred_geteuid(l->l_cred) : -1;
+ kauth_cred_t cred;
- if (error)
- log(LOG_INFO, lognocoredump, p->p_pid,
- p->p_comm, uid, signo, error);
- else
- log(LOG_INFO, logcoredump, p->p_pid,
- p->p_comm, uid, signo);
+ cred = l->l_cred;
+ /* XXX should use PRIdUID if it is ever provided */
+ log(cdlp, "%s: pid %d [eid %d:%d, rid %d:%d, svid
%d:%d] exited on signal %d: %s [in %s%s%s]\n",
+ p->p_comm,
+ p->p_pid,
+ cred ? kauth_cred_geteuid(cred) : -1,
+ cred ? kauth_cred_getegid(cred) : -1,
+ cred ? kauth_cred_getuid(cred) : -1,
+ cred ? kauth_cred_getgid(cred) : -1,
+ cred ? kauth_cred_getsvuid(cred) : -1,
+ cred ? kauth_cred_getsvgid(cred) : -1,
+ signo, /* XXX too bad strsignal() isn't in the
kernel.... */
+ cdwhy,
+ (error != 0 || (corename && *corename && *corename
!= '/')) ? cdcwdbp : "",
+ (error == 0 && *corename != '/') ? "/" : "",
+ (error == 0) ? corename : "");
+ }
+ if (cdlp != LOG_DEBUG) { /* tell the user of really abnormal
events */
+ uprintf("%s: pid %d exited on signal %d: %s [in
%s%s%s]\n",
+ p->p_comm,
+ p->p_pid,
+ signo, /* XXX strsignal() */
+ cdwhy,
+ (error != 0 || (corename && *corename &&
*corename != '/')) ? cdcwdbp : "",
+ (error == 0 && *corename != '/') ? "/" : "",
+ (error == 0) ? corename : "");
}
#ifdef PAX_SEGVGUARD
Index: sys/kern/kern_core.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/kern/kern_core.c,v
retrieving revision 1.20
diff -u -r1.20 kern_core.c
--- sys/kern/kern_core.c 24 Sep 2011 22:53:50 -0000 1.20
+++ sys/kern/kern_core.c 4 Oct 2011 17:53:46 -0000
@@ -60,7 +60,7 @@
off_t io_offset;
};
-static int coredump(struct lwp *, const char *);
+static int coredump(struct lwp *, const char *, char *);
static int coredump_buildname(struct proc *, char *, const char *, size_t);
static int
@@ -90,7 +90,7 @@
* value of shortcorename), unless the process was setuid/setgid.
*/
static int
-coredump(struct lwp *l, const char *pattern)
+coredump(struct lwp *l, const char *pattern, char *corename)
{
struct vnode *vp;
struct proc *p;
@@ -105,6 +105,10 @@
char *name, *lastslash;
name = PNBUF_GET();
+ if (name == NULL) {
+ error = ENOMEM;
+ goto done;
+ }
p = l->l_proc;
vm = p->p_vmspace;
@@ -119,7 +123,7 @@
*/
if (USPACE + ctob(vm->vm_dsize + vm->vm_ssize) >=
p->p_rlimit[RLIMIT_CORE].rlim_cur) {
- error = EFBIG; /* better error code? */
+ error = EFBIG;
mutex_exit(p->p_lock);
mutex_exit(proc_lock);
goto done;
@@ -138,12 +142,12 @@
*/
if (p->p_flag & PK_SUGID) {
if (!security_setidcore_dump) {
- error = EPERM;
+ error = EAUTH;
mutex_exit(p->p_lock);
mutex_exit(proc_lock);
goto done;
}
- pattern = security_setidcore_path;
+ pattern = security_setidcore_path; /* is permitted to be NULL */
}
/* Lock, as p_limit and pl_corename might change. */
@@ -152,6 +156,7 @@
if (pattern == NULL) {
pattern = lim->pl_corename;
}
+ KASSERT(pattern); /* otherwise name is "", what will vn_open() do? */
error = coredump_buildname(p, name, pattern, MAXPATHLEN);
mutex_exit(&lim->pl_lock);
@@ -254,8 +259,11 @@
if (error == 0)
error = error1;
done:
- if (name != NULL)
+ if (name != NULL) {
+ if (corename)
+ strcpy(corename, name);
PNBUF_PUT(name);
+ }
return error;
}
Home |
Main Index |
Thread Index |
Old Index