Subject: second pass at the "store the path of the executable in struct proc" patch
To: None <tech-kern@netbsd.org>
From: Christos Zoulas <christos@zoulas.com>
List: tech-kern
Date: 09/27/2007 12:33:31
- Uses reference counted paths and malloc().
  Should it use kmem_alloc() instead?
- Are the names of the reference-counted path functions ok?

If I don't hear any objections, I'll commit it.

christos

Index: kern/exec_conf.c
===================================================================
RCS file: /cvsroot/src/sys/kern/exec_conf.c,v
retrieving revision 1.93
diff -u -u -r1.93 exec_conf.c
--- kern/exec_conf.c	30 Aug 2006 14:41:06 -0000	1.93
+++ kern/exec_conf.c	26 Sep 2007 21:56:56 -0000
@@ -90,6 +90,11 @@
     void *, char *, vaddr_t *);
 #endif
 
+#define ELF32_AUXSIZE (howmany(ELF_AUX_ENTRIES * sizeof(Aux32Info), \
+    sizeof(Elf32_Addr)) + MAXPATHLEN + ALIGN(1))
+#define ELF64_AUXSIZE (howmany(ELF_AUX_ENTRIES * sizeof(Aux64Info), \
+    sizeof(Elf64_Addr)) + MAXPATHLEN + ALIGN(1))
+
 /*
  * Compatibility with old ELF binaries without NetBSD note.
  * Generic ELF executable kernel support was added in NetBSD 1.1.
@@ -324,7 +329,7 @@
 	  { ELF32NAME2(netbsd32,probe) },
 	  &emul_netbsd32,
 	  EXECSW_PRIO_FIRST,
-	  howmany(ELF_AUX_ENTRIES * sizeof(Aux32Info), sizeof (Elf32_Addr)),
+	  ELF32_AUXSIZE,
 	  netbsd32_elf32_copyargs,
 	  NULL,
 #ifdef COREDUMP
@@ -381,7 +386,7 @@
 	  { ELF32NAME2(netbsd32,probe_noteless) },
 	  &emul_netbsd32,
 	  EXECSW_PRIO_FIRST,
-	  howmany(ELF_AUX_ENTRIES * sizeof(Aux32Info), sizeof (Elf32_Addr)),
+	  ELF32_AUXSIZE,
 	  netbsd32_elf32_copyargs,
 	  NULL,
 	  coredump_elf32,
@@ -396,7 +401,7 @@
 	  { ELF32NAME2(netbsd,probe) },
 	  &emul_netbsd,
 	  EXECSW_PRIO_ANY,
-	  howmany(ELF_AUX_ENTRIES * sizeof(Aux32Info), sizeof (Elf32_Addr)),
+	  ELF32_AUXSIZE,
 	  elf32_copyargs,
 	  NULL,
 #ifdef COREDUMP
@@ -413,7 +418,7 @@
 	  { ELF32NAME2(freebsd,probe) },
 	  &emul_freebsd,
 	  EXECSW_PRIO_ANY,
-	  howmany(ELF_AUX_ENTRIES * sizeof(Aux32Info), sizeof(Elf32_Addr)),
+	  ELF32_AUXSIZE,
 	  elf32_copyargs,
 	  NULL,
 #ifdef COREDUMP
@@ -483,7 +488,7 @@
 	  { ELF32NAME2(svr4,probe) },
 	  &emul_svr4,
 	  EXECSW_PRIO_ANY,
-	  howmany(ELF_AUX_ENTRIES * sizeof(Aux32Info), sizeof (Elf32_Addr)),
+	  ELF32_AUXSIZE,
 	  elf32_copyargs,
 	  NULL,
 #ifdef COREDUMP
@@ -501,7 +506,7 @@
 	  { ELF32NAME2(ibcs2,probe) },
 	  &emul_ibcs2,
 	  EXECSW_PRIO_ANY,
-	  howmany(ELF_AUX_ENTRIES * sizeof(Aux32Info), sizeof (Elf32_Addr)),
+	  ELF32_AUXSIZE,
 	  elf32_copyargs,
 	  NULL,
 #ifdef COREDUMP
@@ -519,7 +524,7 @@
 	  { NULL },
 	  &emul_netbsd,
 	  EXECSW_PRIO_LAST,
-	  howmany(ELF_AUX_ENTRIES * sizeof(Aux32Info), sizeof (Elf32_Addr)),
+	  ELF32_AUXSIZE,
 	  elf32_copyargs,
 	  NULL,
 #ifdef COREDUMP
@@ -539,7 +544,7 @@
 	  { ELF64NAME2(netbsd,probe) },
 	  &emul_netbsd,
 	  EXECSW_PRIO_ANY,
-	  howmany(ELF_AUX_ENTRIES * sizeof(Aux64Info), sizeof (Elf64_Addr)),
+	  ELF64_AUXSIZE,
 	  elf64_copyargs,
 	  NULL,
 #ifdef COREDUMP
@@ -574,7 +579,7 @@
 	  { ELF64NAME2(svr4,probe) },
 	  &emul_svr4,
 	  EXECSW_PRIO_ANY,
-	  howmany(ELF_AUX_ENTRIES * sizeof(Aux64Info), sizeof (Elf64_Addr)),
+	  ELF64_AUXSIZE,
 	  elf64_copyargs,
 	  NULL,
 #ifdef COREDUMP
@@ -592,7 +597,7 @@
 	  { NULL },
 	  &emul_netbsd,
 	  EXECSW_PRIO_ANY,
-	  howmany(ELF_AUX_ENTRIES * sizeof(Aux64Info), sizeof (Elf64_Addr)),
+	  ELF64_AUXSIZE,
 	  elf64_copyargs,
 	  NULL,
 #ifdef COREDUMP
Index: kern/exec_elf32.c
===================================================================
RCS file: /cvsroot/src/sys/kern/exec_elf32.c,v
retrieving revision 1.124
diff -u -u -r1.124 exec_elf32.c
--- kern/exec_elf32.c	24 Jun 2007 20:35:37 -0000	1.124
+++ kern/exec_elf32.c	26 Sep 2007 21:56:56 -0000
@@ -131,7 +131,7 @@
     struct ps_strings *arginfo, char **stackp, void *argp)
 {
 	size_t len;
-	AuxInfo ai[ELF_AUX_ENTRIES], *a;
+	AuxInfo ai[ELF_AUX_ENTRIES], *a, *execname;
 	struct elf_args *ap;
 	int error;
 
@@ -139,6 +139,7 @@
 		return error;
 
 	a = ai;
+	execname = NULL;
 
 	/*
 	 * Push extra arguments on the stack needed by dynamically
@@ -197,6 +198,12 @@
 		a->a_v = kauth_cred_getgid(l->l_cred);
 		a++;
 
+		if (l->l_proc->p_pathref) {
+			execname = a;
+			a->a_type = AT_SUN_EXECNAME;
+			a++;
+		}
+
 		free(ap, M_TEMP);
 		pack->ep_emul_arg = NULL;
 	}
@@ -210,6 +217,15 @@
 		return error;
 	*stackp += len;
 
+	if (execname) {
+		char *path = l->l_proc->p_pathref->path;
+		execname->a_v = (intptr_t)*stackp;
+		len = strlen(path) + 1;
+		if ((error = copyout(path, *stackp, len)) != 0)
+			return error;
+		*stackp += ALIGN(len);
+	}
+
 	return 0;
 }
 
Index: kern/init_main.c
===================================================================
RCS file: /cvsroot/src/sys/kern/init_main.c,v
retrieving revision 1.314
diff -u -u -r1.314 init_main.c
--- kern/init_main.c	7 Sep 2007 18:56:08 -0000	1.314
+++ kern/init_main.c	26 Sep 2007 21:56:57 -0000
@@ -718,6 +718,7 @@
 	 * Now in process 1.
 	 */
 	strncpy(p->p_comm, "init", MAXCOMLEN);
+	p->p_pathref = NULL;
 
 	/*
 	 * Wait for main() to tell us that it's safe to exec.
Index: kern/kern_exec.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_exec.c,v
retrieving revision 1.248
diff -u -u -r1.248 kern_exec.c
--- kern/kern_exec.c	20 Sep 2007 20:51:38 -0000	1.248
+++ kern/kern_exec.c	26 Sep 2007 21:56:57 -0000
@@ -708,6 +708,25 @@
 	arginfo.ps_nargvstr = argc;
 	arginfo.ps_nenvstr = envc;
 
+	/* set command name & other accounting info */
+	i = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
+	(void)memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, i);
+	p->p_comm[i] = '\0';
+
+	if (p->p_pathref != NULL)
+		path_decrref(p->p_pathref);
+
+	dp = PNBUF_GET();
+	if ((error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)) != 0) {
+#ifdef DIAGNOSTIC
+		printf("Cannot get path for pid %d [%s] (error %d)",
+		    (int)p->p_pid, p->p_comm, error);
+#endif
+		p->p_pathref = NULL;
+	} else
+		p->p_pathref = path_newref(dp);
+	PNBUF_PUT(dp);
+
 	stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
 		STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
 		len - (sizeof(struct ps_strings) + szsigcode));
@@ -769,12 +788,8 @@
 
 	l->l_ctxlink = NULL;	/* reset ucontext link */
 
-	/* set command name & other accounting info */
-	i = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
-	memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, i);
-	p->p_comm[i] = '\0';
-	p->p_acflag &= ~AFORK;
 
+	p->p_acflag &= ~AFORK;
 	p->p_flag |= PK_EXEC;
 
 	/*
Index: kern/kern_exit.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_exit.c,v
retrieving revision 1.187
diff -u -u -r1.187 kern_exit.c
--- kern/kern_exit.c	6 Sep 2007 23:58:56 -0000	1.187
+++ kern/kern_exit.c	26 Sep 2007 21:56:58 -0000
@@ -500,6 +500,11 @@
 	 */
 	p->p_stat = SDEAD;
 
+	/* Free the path */
+	if (p->p_pathref)
+		path_decrref(p->p_pathref);
+	p->p_pathref = NULL;
+
 	/* Put in front of parent's sibling list for parent to collect it */
 	q = p->p_pptr;
 	q->p_nstopchild++;
Index: kern/kern_fork.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_fork.c,v
retrieving revision 1.143
diff -u -u -r1.143 kern_fork.c
--- kern/kern_fork.c	21 Sep 2007 19:19:20 -0000	1.143
+++ kern/kern_fork.c	26 Sep 2007 21:56:58 -0000
@@ -307,6 +307,11 @@
 	p2->p_flag = p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN);
 	p2->p_emul = p1->p_emul;
 	p2->p_execsw = p1->p_execsw;
+	if (p1->p_pathref) {
+		path_incrref(p1->p_pathref);
+		p2->p_pathref = p1->p_pathref;
+	} else
+		p2->p_pathref = NULL;
 
 	if (flags & FORK_SYSTEM) {
 		/*
Index: kern/kern_proc.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_proc.c,v
retrieving revision 1.116
diff -u -u -r1.116 kern_proc.c
--- kern/kern_proc.c	21 Sep 2007 19:19:20 -0000	1.116
+++ kern/kern_proc.c	26 Sep 2007 21:56:59 -0000
@@ -246,6 +246,8 @@
 
 static specificdata_domain_t proc_specificdata_domain;
 
+static kmutex_t refpath_mutex;
+
 /*
  * Initialize global process hashing structures.
  */
@@ -261,6 +263,7 @@
 
 	mutex_init(&proclist_lock, MUTEX_DEFAULT, IPL_NONE);
 	mutex_init(&proclist_mutex, MUTEX_SPIN, IPL_SCHED);
+	mutex_init(&refpath_mutex, MUTEX_DEFAULT, IPL_NONE);
 
 	pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table,
 			    M_PROC, M_WAITOK);
@@ -357,6 +360,7 @@
 	(*p->p_emul->e_syscall_intern)(p);
 #endif
 	strlcpy(p->p_comm, "system", sizeof(p->p_comm));
+	p->p_pathref = NULL;
 
 	l->l_flag = LW_INMEM | LW_SYSTEM;
 	l->l_stat = LSONPROC;
@@ -1514,3 +1518,32 @@
 	specificdata_setspecific(proc_specificdata_domain,
 				 &p->p_specdataref, key, data);
 }
+
+MALLOC_DECLARE(M_REFPATH);
+MALLOC_DEFINE(M_REFPATH, "procpath", "proc path reference count");
+
+void
+path_incrref(struct refpath *r)
+{
+	mutex_enter(&refpath_mutex);
+	r->count++;
+	mutex_exit(&refpath_mutex);
+}
+
+void
+path_decrref(struct refpath *r) {
+	mutex_enter(&refpath_mutex);
+	if (--r->count == 0)
+		free(r, M_REFPATH);
+	mutex_exit(&refpath_mutex);
+}
+
+struct refpath *
+path_newref(const char *str)
+{
+	size_t len = strlen(str) + 1;
+	struct refpath *r = malloc(sizeof(*r) + len, M_REFPATH, M_WAITOK);
+	(void)memcpy(r->path, str, len);
+	r->count = 1;
+	return r;
+}
Index: kern/vfs_getcwd.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_getcwd.c,v
retrieving revision 1.35
diff -u -u -r1.35 vfs_getcwd.c
--- kern/vfs_getcwd.c	9 Feb 2007 21:55:32 -0000	1.35
+++ kern/vfs_getcwd.c	26 Sep 2007 21:56:59 -0000
@@ -558,3 +558,54 @@
 	free(path, M_TEMP);
 	return error;
 }
+
+/*
+ * Try to find a pathname for a vnode. Since there is no mapping
+ * vnode -> parent directory, this needs the NAMECACHE_ENTER_REVERSE
+ * option to work (to make cache_revlookup succeed).
+ */
+int
+vnode_to_path(char *path, size_t len, struct vnode *vp, struct lwp *curl,
+    struct proc *p)
+{
+	struct proc *curp = curl->l_proc;
+	int error, lenused, elen;
+	char *bp, *bend;
+	struct vnode *dvp;
+
+	bp = bend = &path[len];
+	*(--bp) = '\0';
+
+	error = vget(vp, LK_EXCLUSIVE | LK_RETRY);
+	if (error != 0)
+		return error;
+	error = cache_revlookup(vp, &dvp, &bp, path);
+	vput(vp);
+	if (error != 0)
+		return (error == -1 ? ENOENT : error);
+
+	error = vget(dvp, 0);
+	if (error != 0)
+		return error;
+	*(--bp) = '/';
+	/* XXX GETCWD_CHECK_ACCESS == 0x0001 */
+	error = getcwd_common(dvp, NULL, &bp, path, len / 2, 1, curl);
+
+	/*
+	 * Strip off emulation path for emulated processes looking at
+	 * the maps file of a process of the same emulation. (Won't
+	 * work if /emul/xxx is a symlink..)
+	 */
+	if (curp->p_emul == p->p_emul && curp->p_emul->e_path != NULL) {
+		elen = strlen(curp->p_emul->e_path);
+		if (!strncmp(bp, curp->p_emul->e_path, elen))
+			bp = &bp[elen];
+	}
+
+	lenused = bend - bp;
+
+	memcpy(path, bp, lenused);
+	path[lenused] = 0;
+
+	return 0;
+}
Index: sys/exec_elf.h
===================================================================
RCS file: /cvsroot/src/sys/sys/exec_elf.h,v
retrieving revision 1.91
diff -u -u -r1.91 exec_elf.h
--- sys/exec_elf.h	19 Aug 2007 03:38:52 -0000	1.91
+++ sys/exec_elf.h	26 Sep 2007 21:57:00 -0000
@@ -806,7 +806,7 @@
 
 #ifdef _KERNEL
 
-#define ELF_AUX_ENTRIES	12		/* Size of aux array passed to loader */
+#define ELF_AUX_ENTRIES	14	/* Max size of aux array passed to loader */
 #define ELF32_NO_ADDR	(~(Elf32_Addr)0) /* Indicates addr. not yet filled in */
 #define ELF32_LINK_ADDR	((Elf32_Addr)-2) /* advises to use link address */
 #define ELF64_NO_ADDR	(~(Elf64_Addr)0) /* Indicates addr. not yet filled in */
Index: sys/filedesc.h
===================================================================
RCS file: /cvsroot/src/sys/sys/filedesc.h,v
retrieving revision 1.40
diff -u -u -r1.40 filedesc.h
--- sys/filedesc.h	9 Jul 2007 21:11:32 -0000	1.40
+++ sys/filedesc.h	26 Sep 2007 21:57:00 -0000
@@ -156,6 +156,8 @@
 #define GETCWD_CHECK_ACCESS 0x0001
 int	getcwd_common(struct vnode *, struct vnode *, char **, char *, int,
     int, struct lwp *);
+int	vnode_to_path(char *, size_t, struct vnode *, struct lwp *,
+    struct proc *);
 
 int	closef(struct file *, struct lwp *);
 int	getsock(struct filedesc *, int, struct file **);
Index: sys/proc.h
===================================================================
RCS file: /cvsroot/src/sys/sys/proc.h,v
retrieving revision 1.254
diff -u -u -r1.254 proc.h
--- sys/proc.h	7 Sep 2007 18:56:12 -0000	1.254
+++ sys/proc.h	26 Sep 2007 21:57:00 -0000
@@ -189,6 +189,11 @@
  */
 #define	EMUL_HAS_SYS___syscall	0x001	/* Has SYS___syscall */
 
+struct refpath {
+	size_t	count;
+	char	path[1];
+};
+
 /*
  * Description of a process.
  *
@@ -249,6 +254,7 @@
 	char		p_pad1[3];
 
 	pid_t		p_pid;		/* (: Process identifier. */
+	struct refpath	*p_pathref;	/* (: absolute path of executable */
 	LIST_ENTRY(proc) p_pglist;	/* l: List of processes in pgrp. */
 	struct proc 	*p_pptr;	/* l: Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* l: List of sibling processes. */
@@ -566,6 +572,10 @@
     int (*)(struct proc *, void *arg), void *);
 static __inline struct proc *_proclist_skipmarker(struct proc *);
 
+void	path_incrref(struct refpath *);
+void	path_decrref(struct refpath *);
+struct refpath *path_newref(const char *);
+
 static __inline struct proc *
 _proclist_skipmarker(struct proc *p0)
 {
Index: miscfs/procfs/procfs_map.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/procfs/procfs_map.c,v
retrieving revision 1.32
diff -u -u -r1.32 procfs_map.c
--- miscfs/procfs/procfs_map.c	21 Jul 2007 22:47:36 -0000	1.32
+++ miscfs/procfs/procfs_map.c	26 Sep 2007 21:57:00 -0000
@@ -93,9 +93,6 @@
 
 #define BUFFERSIZE (64 * 1024)
 
-static int procfs_vnode_to_path(struct vnode *vp, char *path, int len,
-				struct lwp *curl, struct proc *p);
-
 /*
  * The map entries can *almost* be read with programs like cat.  However,
  * large maps need special programs to read.  It is not easy to implement
@@ -167,8 +164,8 @@
 				if (error == 0 && vp != pfs->pfs_vnode) {
 					fileid = va.va_fileid;
 					dev = va.va_fsid;
-					error = procfs_vnode_to_path(vp, path,
-					    MAXPATHLEN * 4, curl, p);
+					error = vnode_to_path(path,
+					    MAXPATHLEN * 4, vp, curl, p);
 				}
 			}
 			pos += snprintf(buffer + pos, BUFFERSIZE - pos,
@@ -219,53 +216,3 @@
 {
 	return ((l->l_flag & LW_SYSTEM) == 0);
 }
-
-/*
- * Try to find a pathname for a vnode. Since there is no mapping
- * vnode -> parent directory, this needs the NAMECACHE_ENTER_REVERSE
- * option to work (to make cache_revlookup succeed).
- */
-static int procfs_vnode_to_path(struct vnode *vp, char *path, int len,
-				struct lwp *curl, struct proc *p)
-{
-	struct proc *curp = curl->l_proc;
-	int error, lenused, elen;
-	char *bp, *bend;
-	struct vnode *dvp;
-
-	bp = bend = &path[len];
-	*(--bp) = '\0';
-
-	error = vget(vp, LK_EXCLUSIVE | LK_RETRY);
-	if (error != 0)
-		return error;
-	error = cache_revlookup(vp, &dvp, &bp, path);
-	vput(vp);
-	if (error != 0)
-		return (error == -1 ? ENOENT : error);
-
-	error = vget(dvp, 0);
-	if (error != 0)
-		return error;
-	*(--bp) = '/';
-	/* XXX GETCWD_CHECK_ACCESS == 0x0001 */
-	error = getcwd_common(dvp, NULL, &bp, path, len / 2, 1, curl);
-
-	/*
-	 * Strip off emulation path for emulated processes looking at
-	 * the maps file of a process of the same emulation. (Won't
-	 * work if /emul/xxx is a symlink..)
-	 */
-	if (curp->p_emul == p->p_emul && curp->p_emul->e_path != NULL) {
-		elen = strlen(curp->p_emul->e_path);
-		if (!strncmp(bp, curp->p_emul->e_path, elen))
-			bp = &bp[elen];
-	}
-
-	lenused = bend - bp;
-
-	memcpy(path, bp, lenused);
-	path[lenused] = 0;
-
-	return 0;
-}
Index: miscfs/procfs/procfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/procfs/procfs_vnops.c,v
retrieving revision 1.158
diff -u -u -r1.158 procfs_vnops.c
--- miscfs/procfs/procfs_vnops.c	22 Jul 2007 13:37:13 -0000	1.158
+++ miscfs/procfs/procfs_vnops.c	26 Sep 2007 21:57:01 -0000
@@ -146,7 +146,7 @@
 static int procfs_validfile_linux(struct lwp *, struct mount *);
 static int procfs_root_readdir_callback(struct proc *, void *);
 static struct vnode *procfs_dir(pfstype, struct lwp *, struct proc *,
-				char **, char *, int);
+				char **, char *, size_t);
 
 /*
  * This is a list of the valid names in the
@@ -561,7 +561,7 @@
  */
 static struct vnode *
 procfs_dir(pfstype t, struct lwp *caller, struct proc *target,
-	   char **bpp, char *path, int len)
+	   char **bpp, char *path, size_t len)
 {
 	struct vnode *vp, *rvp = caller->l_proc->p_cwdi->cwdi_rdir;
 	char *bp;
@@ -579,6 +579,12 @@
 		break;
 	case PFSexe:
 		vp = target->p_textvp;
+		if (target->p_pathref && rvp == target->p_cwdi->cwdi_rdir) {
+			(void)strlcpy(path, target->p_pathref->path, len);
+			if (bpp)
+				*bpp = path;
+			return vp;
+		}
 		break;
 	default:
 		return (NULL);