Subject: Re: new pid allocation code
To: None <tech-kern@netbsd.org>
From: David Laight <david@l8s.co.uk>
List: tech-kern
Date: 03/12/2003 19:57:59
On Tue, Mar 11, 2003 at 02:36:30PM +0000, David Laight wrote:
> Hackers,
> 
> The code below implements a different pid allocation and proc/pgrp
> lookup algorithm.
> 
> The main benefits are:
> - pid and pgrp lookup (by id) doesn't require a search
> - no dependency on MAXUSERS
> - automatically scales well to large numbers of processes
> - small data footprint for small systems
> - ability to enumerate through all the processes without holding a lock
>   for the entire duration, or having very messy locking rules.
>   (the allproc list and p_list fields could be depracted later).
> - Largely MP clean

The following diff is against 'current' and should produce a working
kernel (any problems are caused by me hacking the diff).

The changes to struct proc and struct pgrp probably ought to invoke a
kernel version change (both structures shrink).

	David

Index: sys/proc.h
===================================================================
RCS file: /cvsroot/src/sys/sys/proc.h,v
retrieving revision 1.160
diff -u -p -r1.160 proc.h
--- sys/proc.h	2003/02/18 08:37:43	1.160
+++ sys/proc.h	2003/03/12 18:34:40
@@ -74,7 +74,6 @@ struct session {
  * One structure allocated per process group.
  */
 struct pgrp {
-	LIST_ENTRY(pgrp) pg_hash;	/* Hash chain */
 	LIST_HEAD(, proc) pg_members;	/* Pointer to pgrp members */
 	struct session	*pg_session;	/* Pointer to session */
 	pid_t		pg_id;		/* Pgrp id */
@@ -170,7 +169,7 @@ struct proc {
 	char		p_pad1[3];
 
 	pid_t		p_pid;		/* Process identifier. */
-	LIST_ENTRY(proc) p_hash;	/* Hash chain. */
+	struct proc	*p_dead;	/* Processes waiting for reaper */
 	LIST_ENTRY(proc) p_pglist;	/* List of processes in pgrp. */
 	struct proc 	*p_pptr;	/* Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* List of sibling processes. */
@@ -340,24 +340,18 @@ MALLOC_DECLARE(M_SESSION);
 MALLOC_DECLARE(M_SUBPROC);
 
 /*
- * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t,
- * as it is used to represent "no process group".
+ * We use process IDs <= PID_MAX until there are > 16k processes.
+ * NO_PGID is used to represent "no process group" for a tty.
  */
 #define	PID_MAX		30000
-#define	NO_PID		30001
+#define	NO_PGID		(-(pid_t)1)
 
-/*
- * Process IDs <0,PID_SKIP-1> are not considered for new processes
- * once the prototype wraps around.
- */
-#define PID_SKIP	500
-
 #define	SESS_LEADER(p)	((p)->p_session->s_leader == (p))
 #define	SESSHOLD(s)	((s)->s_count++)
 #define	SESSRELE(s)							\
 do {									\
 	if (--(s)->s_count == 0)					\
-		FREE(s, M_SESSION);					\
+		sessdelete(s);						\
 } while (/* CONSTCOND */ 0)
 
 
@@ -372,14 +366,6 @@ do {									\
 #define	FORK_NOWAIT	0x20		/* Make init the parent of the child */
 #define	FORK_CLEANFILES	0x40		/* Start with a clean descriptor set */
 
-#define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
-extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
-extern u_long		pidhash;
-
-#define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
-extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
-extern u_long		pgrphash;
-
 /*
  * Allow machine-dependent code to override curproc in <machine/cpu.h> for
  * its own convenience.  Otherwise, we declare it as appropriate.
@@ -403,14 +389,13 @@ extern struct lock	proclist_lock;
 extern struct proclist	allproc;	/* List of all processes */
 extern struct proclist	zombproc;	/* List of zombie processes */
 
-extern struct proclist deadproc;	/* List of dead processes */
+extern struct proc	*deadprocs;	/* List of dead processes */
 extern struct simplelock deadproc_slock;
 
 extern struct proc	*initproc;	/* Process slots for init, pager */
 
 extern const struct proclist_desc proclists[];
 
-extern struct pool	proc_pool;	/* Memory pool for procs */
 extern struct pool	pcred_pool;	/* Memory pool for pcreds */
 extern struct pool	plimit_pool;	/* Memory pool for plimits */
 extern struct pool 	pstats_pool;	/* memory pool for pstats */
@@ -426,6 +411,7 @@ int	enterpgrp(struct proc *p, pid_t pgid
 void	fixjobc(struct proc *p, struct pgrp *pgrp, int entering);
 int	inferior(struct proc *p, struct proc *q);
 int	leavepgrp(struct proc *p);
+void	sessdelete(struct session *);
 void	yield(void);
 struct lwp *chooselwp(void);
 void	pgdelete(struct pgrp *pgrp);
@@ -440,7 +426,10 @@ void	reaper(void *);
 void	exit1(struct lwp *, int);
 void	exit2(struct lwp *);
 int	find_stopped_child(struct proc *, pid_t, int, struct proc **);
+struct proc *proc_alloc(void);
+void	proc0_insert(struct proc *, struct lwp *, struct pgrp *, struct session *);
 void	proc_free(struct proc *);
+void	proc_free_mem(struct proc *);
 void	exit_lwps(struct lwp *l);
 int	fork1(struct lwp *, int, int, void *, size_t,
 	    void (*)(void *), void *, register_t *, struct proc **);
@@ -467,7 +456,7 @@ void	proclist_lock_read(void);
 void	proclist_unlock_read(void);
 int	proclist_lock_write(void);
 void	proclist_unlock_write(int);
-void	p_sugid(struct proc*);
+void	p_sugid(struct proc *);
 
 /* Compatibility with old, non-interlocked tsleep call */
 #define	tsleep(chan, pri, wmesg, timo)					\
Index: kern/init_main.c
===================================================================
RCS file: /cvsroot/src/sys/kern/init_main.c,v
retrieving revision 1.217
diff -u -p -r1.217 init_main.c
--- kern/init_main.c	2003/01/20 20:02:56	1.217
+++ kern/init_main.c	2003/03/12 18:34:49
@@ -261,25 +261,7 @@ main(void)
 	 * Create process 0 (the swapper).
 	 */
 	p = &proc0;
-	LIST_INIT(&p->p_lwps);
-	LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
-	p->p_nlwps = 1;
-
-	s = proclist_lock_write();
-	LIST_INSERT_HEAD(&allproc, p, p_list);
-	LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
-	LIST_INSERT_HEAD(&alllwp, l, l_list);
-	proclist_unlock_write(s);
-
-	p->p_pgrp = &pgrp0;
-	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
-	LIST_INIT(&pgrp0.pg_members);
-	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
-
-	pgrp0.pg_session = &session0;
-	session0.s_count = 1;
-	session0.s_sid = p->p_pid;
-	session0.s_leader = p;
+	proc0_insert(p, l, &pgrp0, &session0);
 
 	/*
 	 * Set P_NOCLDWAIT so that kernel threads are reparented to
Index: kern/kern_exit.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_exit.c,v
retrieving revision 1.114
diff -u -p -r1.114 kern_exit.c
--- kern/kern_exit.c	2003/03/12 15:26:33	1.114
+++ kern/kern_exit.c	2003/03/12 18:34:57
@@ -269,39 +269,6 @@ exit1(struct lwp *l, int rv)
 		(*p->p_emul->e_proc_exit)(p);
 
 	/*
-	 * Save exit status and final rusage info, adding in child rusage
-	 * info and self times.
-	 * In order to pick up the time for the current execution, we must
-	 * do this before unlinking the lwp from l_list.
-	 */
-	p->p_xstat = rv;
-	*p->p_ru = p->p_stats->p_ru;
-	calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
-	ruadd(p->p_ru, &p->p_stats->p_cru);
-
-	/*
-	 * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
-	 */
-	p->p_stat = SDEAD;
-	p->p_nrlwps--;
-	l->l_stat = SDEAD;
-
-	/*
-	 * Remove proc from pidhash chain so looking it up won't
-	 * work.  Move it from allproc to zombproc, but do not yet
-	 * wake up the reaper.  We will put the proc on the
-	 * deadproc list later (using the p_hash member), and
-	 * wake up the reaper when we do.
-	 */
-	s = proclist_lock_write();
-	LIST_REMOVE(p, p_hash);
-	LIST_REMOVE(p, p_list);
-	LIST_INSERT_HEAD(&zombproc, p, p_list);
-	LIST_REMOVE(l, l_list);
-	l->l_flag |= L_DETACHED;
-	proclist_unlock_write(s);
-
-	/*
 	 * Give orphaned children to init(8).
 	 */
 	q = LIST_FIRST(&p->p_children);
@@ -353,6 +320,38 @@ exit1(struct lwp *l, int rv)
 	}
 
 	/*
+	 * Save exit status and final rusage info, adding in child rusage
+	 * info and self times.
+	 * In order to pick up the time for the current execution, we must
+	 * do this before unlinking the lwp from l_list.
+	 */
+	p->p_xstat = rv;
+	*p->p_ru = p->p_stats->p_ru;
+	calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
+	ruadd(p->p_ru, &p->p_stats->p_cru);
+
+	/*
+	 * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
+	 */
+
+	/*
+	 * Move proc from allproc to zombproc, but do not yet
+	 * wake up the reaper.  We will put the proc on the
+	 * deadproc list later (using the p_dead member), and
+	 * wake up the reaper when we do.
+	 * Changing the state to SDEAD stops it being found by pfind().
+	 */
+	s = proclist_lock_write();
+	p->p_stat = SDEAD;
+	p->p_nrlwps--;
+	l->l_stat = SDEAD;
+	LIST_REMOVE(p, p_list);
+	LIST_INSERT_HEAD(&zombproc, p, p_list);
+	LIST_REMOVE(l, l_list);
+	l->l_flag |= L_DETACHED;
+	proclist_unlock_write(s);
+
+	/*
 	 * Notify interested parties of our demise.
 	 */
 	KNOTE(&p->p_klist, NOTE_EXIT);
@@ -505,7 +504,7 @@ lwp_exit_hook(struct lwp *l, void *arg)
  * we should refrain from changing any interrupt state.
  *
  * We lock the deadproc list (a spin lock), place the proc on that
- * list (using the p_hash member), and wake up the reaper.
+ * list (using the p_dead member), and wake up the reaper.
  */
 void
 exit2(struct lwp *l)
@@ -513,7 +512,8 @@ exit2(struct lwp *l)
 	struct proc *p = l->l_proc;
 
 	simple_lock(&deadproc_slock);
-	LIST_INSERT_HEAD(&deadproc, p, p_hash);
+	p->p_dead = deadprocs;
+	deadprocs = p;
 	simple_unlock(&deadproc_slock);
 
 	/* lwp_exit2() will wake up deadproc for us. */
@@ -535,11 +535,11 @@ reaper(void *arg)
 
 	for (;;) {
 		simple_lock(&deadproc_slock);
-		p = LIST_FIRST(&deadproc);
+		p = deadprocs;
 		l = LIST_FIRST(&deadlwp);
 		if (p == NULL && l == NULL) {
 			/* No work for us; go to sleep until someone exits. */
-			(void) ltsleep(&deadproc, PVM|PNORELOCK,
+			(void) ltsleep(&deadprocs, PVM|PNORELOCK,
 			    "reaper", 0, &deadproc_slock);
 			continue;
 		}
@@ -547,7 +547,7 @@ reaper(void *arg)
 		if (l != NULL ) {
 			p = l->l_proc;
 
-			/* Remove us from the deadlwp list. */
+			/* Remove lwp from the deadlwp list. */
 			LIST_REMOVE(l, l_list);
 			simple_unlock(&deadproc_slock);
 			KERNEL_PROC_LOCK(curlwp);
@@ -580,8 +580,8 @@ reaper(void *arg)
 			 * the wakeup() above? */
 			KERNEL_PROC_UNLOCK(curlwp);
 		} else {
-			/* Remove us from the deadproc list. */
-			LIST_REMOVE(p, p_hash);
+			/* Remove proc from the deadproc list. */
+			deadprocs = p->p_dead;
 			simple_unlock(&deadproc_slock);
 			KERNEL_PROC_LOCK(curlwp);
 
@@ -754,6 +754,16 @@ proc_free(struct proc *p)
 
 	scheduler_wait_hook(parent, p);
 	p->p_xstat = 0;
+
+	/*
+	 * At this point we are going to start freeing the final resources.
+	 * If anyone tries to access the proc structure after here they
+	 * will get a shock - bits are missing.
+	 * Attempt to make it hard!
+	 */
+
+	p->p_stat = SIDL;		/* not even a zombie any more */
+
 	ruadd(&parent->p_stats->p_cru, p->p_ru);
 	pool_put(&rusage_pool, p->p_ru);
 
@@ -765,9 +775,8 @@ proc_free(struct proc *p)
 
 	s = proclist_lock_write();
 	LIST_REMOVE(p, p_list);	/* off zombproc */
-	proclist_unlock_write(s);
-
 	LIST_REMOVE(p, p_sibling);
+	proclist_unlock_write(s);
 
 	/*
 	 * Decrement the count of procs running with this uid.
@@ -796,9 +805,8 @@ proc_free(struct proc *p)
 		pool_put(&sadata_pool, p->p_sa);
 	}
 
-	pool_put(&proc_pool, p);
-	nprocs--;
-	return;
+	/* Free proc structure and let pid be reallocated */
+	proc_free_mem(p);
 }
 
 /*
Index: kern/kern_fork.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_fork.c,v
retrieving revision 1.106
diff -u -p -r1.106 kern_fork.c
--- kern/kern_fork.c	2003/01/24 01:42:53	1.106
+++ kern/kern_fork.c	2003/03/12 18:34:59
@@ -204,13 +204,12 @@ fork1(struct lwp *l1, int flags, int exi
     void (*func)(void *), void *arg, register_t *retval,
     struct proc **rnewprocp)
 {
-	struct proc	*p1, *p2, *tp;
+	struct proc	*p1, *p2;
 	uid_t		uid;
 	struct lwp	*l2;
 	int		count, s;
 	vaddr_t		uaddr;
 	boolean_t	inmem;
-	static int	nextpid, pidchecked;
 
 	/*
 	 * Although process entries are dynamically created, we still keep
@@ -267,7 +266,7 @@ fork1(struct lwp *l1, int flags, int exi
 	 */
 
 	/* Allocate new proc. */
-	p2 = pool_get(&proc_pool, PR_WAITOK);
+	p2 = proc_alloc();
 
 	/*
 	 * Make a proc table entry for the new process.
@@ -335,15 +334,21 @@ fork1(struct lwp *l1, int flags, int exi
 		p2->p_limit->p_refcnt++;
 	}
 
+	/* Inherit STOPFORK and STOPEXEC flags */
+	p2->p_flag |= p1->p_flag & (P_STOPFORK | P_STOPEXEC);
+
 	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
 		p2->p_flag |= P_CONTROLT;
 	if (flags & FORK_PPWAIT)
 		p2->p_flag |= P_PPWAIT;
-	LIST_INSERT_AFTER(p1, p2, p_pglist);
 	p2->p_pptr = (flags & FORK_NOWAIT) ? initproc : p1;
-	LIST_INSERT_HEAD(&p2->p_pptr->p_children, p2, p_sibling);
 	LIST_INIT(&p2->p_children);
 
+	s = proclist_lock_write();
+	LIST_INSERT_AFTER(p1, p2, p_pglist);
+	LIST_INSERT_HEAD(&p2->p_pptr->p_children, p2, p_sibling);
+	proclist_unlock_write(s);
+
 #ifdef KTRACE
 	/*
 	 * Copy traceflag and tracefile if enabled.
@@ -390,89 +395,18 @@ fork1(struct lwp *l1, int flags, int exi
 	uvm_proc_fork(p1, p2, (flags & FORK_SHAREVM) ? TRUE : FALSE);
 
 	/*
-	 * Finish creating the child process.  It will return through a
-	 * different path later.
+	 * Finish creating the child process.
+	 * It will return through a different path later.
 	 */
 	newlwp(l1, p2, uaddr, inmem, 0, stack, stacksize, 
 	    (func != NULL) ? func : child_return, 
 	    arg, &l2);
 
-	/*
-	 * BEGIN PID ALLOCATION.
-	 */
+	/* Now safe for scheduler to see child process */
 	s = proclist_lock_write();
-
-	/*
-	 * Find an unused process ID.  We remember a range of unused IDs
-	 * ready to use (from nextpid+1 through pidchecked-1).
-	 */
-	nextpid++;
- retry:
-	/*
-	 * If the process ID prototype has wrapped around,
-	 * restart somewhat above 0, as the low-numbered procs
-	 * tend to include daemons that don't exit.
-	 */
-	if (nextpid >= PID_MAX) {
-		nextpid = PID_SKIP;
-		pidchecked = 0;
-	}
-	if (nextpid >= pidchecked) {
-		const struct proclist_desc *pd;
-
-		pidchecked = PID_MAX;
-		/*
-		 * Scan the process lists to check whether this pid
-		 * is in use.  Remember the lowest pid that's greater
-		 * than nextpid, so we can avoid checking for a while.
-		 */
-		pd = proclists;
- again:
-		LIST_FOREACH(tp, pd->pd_list, p_list) {
-			while (tp->p_pid == nextpid ||
-			    tp->p_pgrp->pg_id == nextpid ||
-			    tp->p_session->s_sid == nextpid) {
-				nextpid++;
-				if (nextpid >= pidchecked)
-					goto retry;
-			}
-			if (tp->p_pid > nextpid && pidchecked > tp->p_pid)
-				pidchecked = tp->p_pid;
-
-			if (tp->p_pgrp->pg_id > nextpid && 
-			    pidchecked > tp->p_pgrp->pg_id)
-				pidchecked = tp->p_pgrp->pg_id;
-
-			if (tp->p_session->s_sid > nextpid &&
-			    pidchecked > tp->p_session->s_sid)
-				pidchecked = tp->p_session->s_sid;
-		}
-
-		/*
-		 * If there's another list, scan it.  If we have checked
-		 * them all, we've found one!
-		 */
-		pd++;
-		if (pd->pd_list != NULL)
-			goto again;
-	}
-
-	/*
-	 * Put the proc on allproc before unlocking PID allocation
-	 * so that waiters won't grab it as soon as we unlock.
-	 */
-
 	p2->p_stat = SIDL;			/* protect against others */
-	p2->p_pid = nextpid;
 	p2->p_exitsig = exitsig;		/* signal for parent on exit */
-
 	LIST_INSERT_HEAD(&allproc, p2, p_list);
-
-	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
-
-	/*
-	 * END PID ALLOCATION.
-	 */
 	proclist_unlock_write(s);
 
 #ifdef SYSTRACE
@@ -502,14 +436,6 @@ fork1(struct lwp *l1, int flags, int exi
 		setrunqueue(l2);
 	}
 	SCHED_UNLOCK(s);
-
-	/*
-	 * Inherit STOPFORK and STOPEXEC flags 
-	 */
-	if (p1->p_flag & P_STOPFORK)
-		p2->p_flag |= P_STOPFORK;
-	if (p1->p_flag & P_STOPEXEC)
-		p2->p_flag |= P_STOPEXEC;
 
 	/*
 	 * Now can be swapped.
Index: kern/kern_lwp.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_lwp.c,v
retrieving revision 1.5
diff -u -p -r1.5 kern_lwp.c
--- kern/kern_lwp.c	2003/01/30 05:51:58	1.5
+++ kern/kern_lwp.c	2003/03/12 18:35:02
@@ -555,7 +555,7 @@ lwp_exit2(struct lwp *l)
 	LIST_INSERT_HEAD(&deadlwp, l, l_list);
 	simple_unlock(&deadproc_slock);
 
-	wakeup(&deadproc);
+	wakeup(&deadprocs);
 }
 
 /*
Index: kern/kern_proc.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_proc.c,v
retrieving revision 1.59
diff -u -p -r1.59 kern_proc.c
--- kern/kern_proc.c	2003/03/12 16:39:01	1.59
+++ kern/kern_proc.c	2003/03/12 18:35:08
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_proc.c,v 1.59 2003/03/12 16:39:01 dsl Exp $	*/
+/*	$NetBSD: kern_proc.c,v 1.58 2003/02/15 18:10:16 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -73,7 +73,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.59 2003/03/12 16:39:01 dsl Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.58 2003/02/15 18:10:16 dsl Exp $");
 
 #include "opt_kstack.h"
 
@@ -98,6 +98,8 @@ __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,
 #include <sys/sa.h>
 #include <sys/savar.h>
 
+static void pg_delete(pid_t);
+
 /*
  * Structure associated with user cacheing.
  */
@@ -113,10 +115,6 @@ u_long uihash;		/* size of hash table - 
 /*
  * Other process lists
  */
-struct pidhashhead *pidhashtbl;
-u_long pidhash;
-struct pgrphashhead *pgrphashtbl;
-u_long pgrphash;
 
 struct proclist allproc;
 struct proclist zombproc;	/* resources have been freed */
@@ -143,11 +141,43 @@ struct lock proclist_lock;
  * Locking of this proclist is special; it's accessed in a
  * critical section of process exit, and thus locking it can't
  * modify interrupt state.  We use a simple spin lock for this
- * proclist.  Processes on this proclist are also on zombproc;
- * we use the p_hash member to linkup to deadproc.
+ * proclist.  Processes on this proclist are also on zombproc.
  */
 struct simplelock deadproc_slock;
-struct proclist deadproc;	/* dead, but not yet undead */
+struct proc *deadprocs;	/* dead, but not yet undead */
+
+/*
+ * pid to proc lookup is done by indexing the pid_table array. 
+ * Since pid numbers are only allocated when an empty slot
+ * has been found, there is no need to search any lists ever.
+ * (an orphaned pgrp will lock the slot, a session will lock
+ * the pgrp with the same number.)
+ * If the table is too small it is reallocated with twice the
+ * previous size and the entries 'unzipped' into the two halves.
+ * A linked list of free entries is passed through the pt_proc
+ * field of 'free' items - set odd to be an invalid ptr.
+ */
+
+struct pid_table {
+	struct proc	*pt_proc;
+	struct pgrp	*pt_pgrp;
+}; 
+#if 1	/* strongly typed cast - should be a noop */
+static __inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; };
+#else
+#define p2u(p) ((uint)p)
+#endif 
+#define P_VALID(p) (!(p2u(p) & 1))
+#define P_NEXT(p) (p2u(p) >> 1)
+#define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
+
+static struct pid_table *pid_table;
+static uint pid_tbl_mask = (1 << 5) - 1;	/* table size 2^n */
+static uint pid_alloc_lim;	/* max we allocate before growing table */
+
+/* links through free slots - never empty! */
+static uint next_free_pt, last_free_pt;
+static pid_t pid_max = PID_MAX;		/* largest value we allocate */
 
 struct pool proc_pool;
 struct pool lwp_pool;
@@ -190,23 +220,36 @@ void
 procinit(void)
 {
 	const struct proclist_desc *pd;
+	int i;
 
 	for (pd = proclists; pd->pd_list != NULL; pd++)
 		LIST_INIT(pd->pd_list);
 
 	spinlockinit(&proclist_lock, "proclk", 0);
 
-	LIST_INIT(&deadproc);
 	simple_lock_init(&deadproc_slock);
 
+	pid_table = malloc((pid_tbl_mask + 1) * sizeof *pid_table,
+			    M_PROC, M_WAITOK);
+	/* Set free list running through table...
+	   Preset 'use count' to -1 so we allocate pid 1 next. */
+	for (i = 0; i <= pid_tbl_mask; i++) {
+		pid_table[i].pt_proc = P_FREE((~0 & ~pid_tbl_mask) + i + 1);
+		pid_table[i].pt_pgrp = 0;
+	}
+	/* slot 0 is just grabbed */
+	next_free_pt = 1;
+	/* Need to fix last entry. */
+	last_free_pt = pid_tbl_mask;
+	pid_table[last_free_pt].pt_proc = P_FREE(~0 & ~pid_tbl_mask);
+	/* point at which we grow table - to avoid reusing pids too often */
+	pid_alloc_lim = pid_tbl_mask - 1;
+
+
 	LIST_INIT(&alllwp);
 	LIST_INIT(&deadlwp);
 	LIST_INIT(&zomblwp);
 
-	pidhashtbl =
-	    hashinit(maxproc / 4, HASH_LIST, M_PROC, M_WAITOK, &pidhash);
-	pgrphashtbl =
-	    hashinit(maxproc / 4, HASH_LIST, M_PROC, M_WAITOK, &pgrphash);
 	uihashtbl =
 	    hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash);
 
@@ -350,14 +393,18 @@ pfind(pid_t pid)
 	struct proc *p;
 
 	proclist_lock_read();
-	LIST_FOREACH(p, PIDHASH(pid), p_hash)
-		if (p->p_pid == pid)
-			goto out;
- out:
+	p = pid_table[pid & pid_tbl_mask].pt_proc;
+	/* Only allow live processes to be found by pid. */
+	if (!P_VALID(p) || p->p_pid != pid ||
+	    !((1 << SACTIVE | 1 << SSTOP) & 1 << p->p_stat))
+		p = 0;
+
+	/* XXX MP - need to have a reference count... */
 	proclist_unlock_read();
-	return (p);
+	return p;
 }
 
+
 /*
  * Locate a process group by number
  */
@@ -366,54 +413,317 @@ pgfind(pid_t pgid)
 {
 	struct pgrp *pgrp;
 
-	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash)
-		if (pgrp->pg_id == pgid)
-			return (pgrp);
-	return (NULL);
+	proclist_lock_read();
+	pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
+	/*
+	 * Can't look up a pgrp that only exists because the session
+	 * hasn't died yet (traditional)
+	 */
+	if (pgrp == NULL || pgrp->pg_id != pgid
+	    || LIST_EMPTY(&pgrp->pg_members))
+		pgrp = 0;
+
+	/* XXX MP - need to have a reference count... */
+	proclist_unlock_read();
+	return pgrp;
 }
 
 /*
- * Move p to a new or existing process group (and session)
+ * Set entry for process 0
  */
-int
-enterpgrp(struct proc *p, pid_t pgid, int mksess)
+void
+proc0_insert(struct proc *p, struct lwp *l, struct pgrp *pgrp,
+	struct session *sess)
 {
-	struct pgrp *pgrp = pgfind(pgid);
+	int s;
 
+	LIST_INIT(&p->p_lwps);
+	LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
+	p->p_nlwps = 1;
+
+	s = proclist_lock_write();
+
+	pid_table[0].pt_proc = p;
+	LIST_INSERT_HEAD(&allproc, p, p_list);
+	LIST_INSERT_HEAD(&alllwp, l, l_list);
+
+	p->p_pgrp = pgrp;
+	pid_table[0].pt_pgrp = pgrp;
+	LIST_INIT(&pgrp->pg_members);
+	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
+
+	pgrp->pg_session = sess;
+	sess->s_count = 1;
+	sess->s_sid = 0;
+	sess->s_leader = p;
+
+	proclist_unlock_write(s);
+}
+
+static void
+expand_pid_table(void)
+{
+	uint pt_size = pid_tbl_mask + 1;
+	struct pid_table *n_pt, *new_pt;
+	struct proc *proc;
+	struct pgrp *pgrp;
+	int i;
+	int s;
+	pid_t pid;
+
+	new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK);
+
+	s = proclist_lock_write();
+	if (pt_size != pid_tbl_mask + 1) {
+		/* Another process beat us to it... */
+		proclist_unlock_write(s);
+		FREE(new_pt, M_PROC);
+		return;
+	}
+	   
+	/*
+	 * Copy entries from old table into new one.
+	 * If 'pid' is 'odd' we need to place in the upper half,
+	 * even pid's to the lower half.
+	 * Free items stay in the low half so we don't have to
+	 * fixup the reference to them.
+	 * We stuff free items on the front of the freelist
+	 * because we can't write to unmodified entries.
+	 * Processing the table backwards maintians a semblance
+	 * of issueing pid numbers that increase with time.
+	 */
+	i = pt_size - 1;
+	n_pt = new_pt + i;
+	for (; ; i--, n_pt--) {
+		proc = pid_table[i].pt_proc;
+		pgrp = pid_table[i].pt_pgrp;
+		if (!P_VALID(proc)) {
+			/* Up 'use count' so that link is valid */
+			pid = (P_NEXT(proc) + pt_size) & ~pt_size;
+			proc = P_FREE(pid);
+			if (pgrp)
+				pid = pgrp->pg_id;
+		} else
+			pid = proc->p_pid;
+		
+		/* Save entry in appropriate half of table */
+		n_pt[pid & pt_size].pt_proc = proc;
+		n_pt[pid & pt_size].pt_pgrp = pgrp;
+
+		/* Put other piece on start of free list */
+		pid = (pid ^ pt_size) & ~pid_tbl_mask;
+		n_pt[pid & pt_size].pt_proc =
+				    P_FREE((pid & ~pt_size) | next_free_pt);
+		n_pt[pid & pt_size].pt_pgrp = 0;
+		next_free_pt = i | (pid & pt_size);
+		if (i == 0)
+			break;
+	}
+
+	/* Switch tables */
+	n_pt = pid_table;
+	pid_table = new_pt;
+	pid_tbl_mask = pt_size * 2 - 1;
+
+	/*
+	 * pid_max starts as PID_MAX (= 30000), once we have 16384
+	 * allocated pids we need it to be larger!
+	 */
+	if (pid_tbl_mask > PID_MAX) {
+		pid_max = pid_tbl_mask * 2 + 1;
+		pid_alloc_lim |= pid_alloc_lim << 1;
+	} else
+		pid_alloc_lim <<= 1;	/* doubles number of free slots... */
+
+	proclist_unlock_write(s);
+	FREE(n_pt, M_PROC);
+}
+
+struct proc *
+proc_alloc(void)
+{
+	struct proc *p;
+	int s;
+	int nxt;
+	pid_t pid;
+	struct pid_table *pt;
+
+	p = pool_get(&proc_pool, PR_WAITOK);
+	p->p_stat = SIDL;			/* protect against others */
+
+	/* allocate next free pid */
+
+	for (;;expand_pid_table()) {
+		if (__predict_false(nprocs >= pid_alloc_lim))
+			/* ensure pids cycle through 2000+ values */
+			continue;
+		s = proclist_lock_write();
+		pt = &pid_table[next_free_pt];
 #ifdef DIAGNOSTIC
-	if (__predict_false(pgrp != NULL && mksess))	/* firewalls */
-		panic("enterpgrp: setsid into non-empty pgrp");
-	if (__predict_false(SESS_LEADER(p)))
-		panic("enterpgrp: session leader attempted setpgrp");
+		if (P_VALID(pt->pt_proc) || pt->pt_pgrp)
+			panic("proc_alloc: slot busy");
 #endif
-	if (pgrp == NULL) {
-		pid_t savepid = p->p_pid;
-		struct proc *np;
-		/*
-		 * new process group
-		 */
+		nxt = P_NEXT(pt->pt_proc);
+		if (nxt & pid_tbl_mask)
+			break;
+		/* Table full - expand (NB last entry not used....) */
+		proclist_unlock_write(s);
+	}
+
+	/* pid is 'saved use count' + 'size' + entry */
+	pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
+	if ((uint)pid > (uint)pid_max)
+		pid &= pid_tbl_mask;
+	p->p_pid = pid;
+	next_free_pt = nxt & pid_tbl_mask;
+
+	/* Grab table slot */
+	pt->pt_proc = p;
+
+	proclist_unlock_write(s);
+
+	return p;
+}
+
+/*
+ * Free last resources of a process - called from proc_free (in kern_exit.c)
+ */
+void
+proc_free_mem(struct proc *p)
+{
+	int s;
+	pid_t pid = p->p_pid;
+	struct pid_table *pt;
+
+	s = proclist_lock_write();
+
+	pt = &pid_table[pid & pid_tbl_mask];
 #ifdef DIAGNOSTIC
-		if (__predict_false(p->p_pid != pgid))
-			panic("enterpgrp: new pgrp and pid != pgid");
+	if (pt->pt_proc != p)
+		panic("proc_free: pid_table mismatch, pid %x, proc %p",
+			pid, p);
 #endif
-		pgrp = pool_get(&pgrp_pool, PR_WAITOK);
-		if ((np = pfind(savepid)) == NULL || np != p) {
-			pool_put(&pgrp_pool, pgrp);
-			return (ESRCH);
-		}
-		if (mksess) {
-			struct session *sess;
+	/* save pid use count in slot */
+	pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
+
+	if (pt->pt_pgrp == NULL) {
+		/* link last freed entry onto ours */
+		pid &= pid_tbl_mask;
+		pt = &pid_table[last_free_pt];
+		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
+		last_free_pt = pid;
+	}
+
+	nprocs--;
+	proclist_unlock_write(s);
+
+	pool_put(&proc_pool, p);
+}
 
-			/*
-			 * new session
-			 */
-			MALLOC(sess, struct session *, sizeof(struct session),
+/*
+ * Move p to a new or existing process group (and session)
+ *
+ * If we are creating a new pgrp, the pgid should equal
+ * the calling processes pid.
+ * If is only valid to enter a process group that is in the session
+ * of the process.
+ * Also mksess should only be set if we are creating a process group
+ *
+ * Only called from sys_setsid, sys_setpgid/sys_setprp and the
+ * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid)
+ */
+int
+enterpgrp(struct proc *p, pid_t pgid, int mksess)
+{
+	struct pgrp *new_pgrp, *pgrp;
+	struct session *sess;
+	struct proc *curp = curproc;
+	pid_t pid = p->p_pid;
+	int rval;
+	int s;
+	pid_t pg_id = NO_PGID;
+
+	/* Allocate data areas we might need before doing any validity checks */
+	proclist_lock_read();		/* Because pid_table might change */
+	if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
+		proclist_unlock_read();
+		new_pgrp = pool_get(&pgrp_pool, PR_WAITOK);
+	} else {
+		proclist_unlock_read();
+		new_pgrp = NULL;
+	}
+	if (mksess)
+		MALLOC(sess, struct session *, sizeof(struct session),
 			    M_SESSION, M_WAITOK);
-			if ((np = pfind(savepid)) == NULL || np != p) {
-				FREE(sess, M_SESSION);
-				pool_put(&pgrp_pool, pgrp);
-				return (ESRCH);
-			}
+	else
+		sess = NULL;
+
+	s = proclist_lock_write();
+	rval = EPERM;	/* most common error (to save typing) */
+
+	/* Check pgrp exists or can be created */
+	pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
+	if (pgrp != NULL && pgrp->pg_id != pgid)
+		goto done;
+
+	/* Can only set another process under restricted circumstances. */
+	if (p != curp) {
+		/* must exist and be one of our children... */
+		if (p != pid_table[pid & pid_tbl_mask].pt_proc
+		    || !inferior(p, curp)) {
+			rval = ESRCH;
+			goto done;
+		}
+		/* ... in the same session... */
+		if (sess != NULL || p->p_session != curp->p_session)
+			goto done;
+		/* ... existing pgid must be in same session ... */
+		if (pgrp != NULL && pgrp->pg_session != p->p_session)
+			goto done;
+		/* ... and not done an exec. */
+		if (p->p_flag & P_EXEC) {
+			rval = EACCES;
+			goto done;
+		}
+	}
+
+	/* Changing the process group/session of a session
+	   leader is definitely off limits. */
+	if (SESS_LEADER(p)) {
+		if (sess == NULL && p->p_pgrp == pgrp)
+			/* unless it's a definite noop */
+			rval = 0;
+		goto done;
+	}
+
+	/* Can only create a process group with id of process */
+	if (pgrp == NULL && pgid != pid)
+		goto done;
+
+	/* Can only create a session if creating pgrp */
+	if (sess != NULL && pgrp != NULL)
+		goto done;
+
+	/* Check we allocated memory for a pgrp... */
+	if (pgrp == NULL && new_pgrp == NULL)
+		goto done;
+
+	/* Don't attach to 'zombie' pgrp */
+	if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
+		goto done;
+
+	/* Expect to succeed now */
+	rval = 0;
+
+	if (pgrp == p->p_pgrp)
+		/* nothing to do */
+		goto done;
+
+	/* Ok all setup, link up required structures */
+	if (pgrp == NULL) {
+		pgrp = new_pgrp;
+		new_pgrp = 0;
+		if (sess != NULL) {
 			sess->s_sid = p->p_pid;
 			sess->s_leader = p;
 			sess->s_count = 1;
@@ -423,21 +733,24 @@ enterpgrp(struct proc *p, pid_t pgid, in
 			memcpy(sess->s_login, p->p_session->s_login,
 			    sizeof(sess->s_login));
 			p->p_flag &= ~P_CONTROLT;
-			pgrp->pg_session = sess;
-#ifdef DIAGNOSTIC
-			if (__predict_false(p != curproc))
-				panic("enterpgrp: mksession and p != curlwp");
-#endif
 		} else {
-			SESSHOLD(p->p_session);
-			pgrp->pg_session = p->p_session;
+			sess = p->p_pgrp->pg_session;
+			SESSHOLD(sess);
 		}
+		pgrp->pg_session = sess;
+		sess = 0;
+
 		pgrp->pg_id = pgid;
 		LIST_INIT(&pgrp->pg_members);
-		LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
+#ifdef DIAGNOSTIC
+		if (pid_table[pgid & pid_tbl_mask].pt_pgrp)
+			panic("enterpgrp: pgrp table slot in use");
+		if (p != curp)
+			panic("enterpgrp: mksession and p != curlwp");
+#endif
+		pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
 		pgrp->pg_jobc = 0;
-	} else if (pgrp == p->p_pgrp)
-		return (0);
+	}
 
 	/*
 	 * Adjust eligibility of affected pgrps to participate in job control.
@@ -447,12 +760,28 @@ enterpgrp(struct proc *p, pid_t pgid, in
 	fixjobc(p, pgrp, 1);
 	fixjobc(p, p->p_pgrp, 0);
 
+	/* Move process to requested group */
 	LIST_REMOVE(p, p_pglist);
 	if (LIST_EMPTY(&p->p_pgrp->pg_members))
-		pgdelete(p->p_pgrp);
+		/* defer delete until we've dumped the lock */
+		pg_id = p->p_pgrp->pg_id;
 	p->p_pgrp = pgrp;
 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
-	return (0);
+
+    done:
+	proclist_unlock_write(s);
+	if (sess != NULL)
+		free(sess, M_SESSION);
+	if (new_pgrp != NULL)
+		pool_put(&pgrp_pool, new_pgrp);
+	if (pg_id != NO_PGID)
+		pg_delete(pg_id);
+#ifdef DIAGNOSTIC
+	if (rval)
+		printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
+			pid, pgid, mksess, curp->p_pid, rval);
+#endif
+	return rval;
 }
 
 /*
@@ -461,31 +790,109 @@ enterpgrp(struct proc *p, pid_t pgid, in
 int
 leavepgrp(struct proc *p)
 {
+	int s = proclist_lock_write();
+	struct pgrp *pgrp;
+	pid_t pg_id;
 
+	pgrp = p->p_pgrp;
 	LIST_REMOVE(p, p_pglist);
-	if (LIST_EMPTY(&p->p_pgrp->pg_members))
-		pgdelete(p->p_pgrp);
 	p->p_pgrp = 0;
-	return (0);
+	pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID;
+	proclist_unlock_write(s);
+
+	if (pg_id != NO_PGID)
+		pg_delete(pg_id);
+	return 0;
 }
 
+static void
+pg_free(pid_t pg_id)
+{
+	struct pgrp *pgrp;
+	struct pid_table *pt;
+	int s;
+
+	s = proclist_lock_write();
+	pt = &pid_table[pg_id & pid_tbl_mask];
+	pgrp = pt->pt_pgrp;
+#ifdef DIAGNOSTIC
+	if (!pgrp || pgrp->pg_id != pg_id || !LIST_EMPTY(pgrp->pg_members))
+		panic("pg_free: process group absent or has members");
+#endif
+	pt->pt_pgrp = 0;
+
+	if (!P_VALID(pt->pt_proc)) {
+		/* orphaned pgrp, put slot onto free list */
+#ifdef DIAGNOSTIC
+		if (P_NEXT(pt->pt_proc) & pid_tbl_mask)
+			panic("pg_free: process slot on free list");
+#endif
+
+		pg_id &= pid_tbl_mask;
+		pt = &pid_table[last_free_pt];
+		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
+		last_free_pt = pg_id;
+
+	}
+	proclist_unlock_write(s);
+
+	pool_put(&pgrp_pool, pgrp);
+}
+
 /*
  * delete a process group
  */
-void
-pgdelete(struct pgrp *pgrp)
+static void
+pg_delete(pid_t pg_id)
 {
+	struct pgrp *pgrp;
+	struct tty *ttyp;
+	struct session *ss;
+	int s;
+
+	s = proclist_lock_write();
+	pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
+	if (pgrp == NULL || pgrp->pg_id != pg_id ||
+	     !LIST_EMPTY(&pgrp->pg_members)) {
+		proclist_unlock_write(s);
+		return;
+	}
 
 	/* Remove reference (if any) from tty to this process group */
-	if (pgrp->pg_session->s_ttyp != NULL && 
-	    pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
-		pgrp->pg_session->s_ttyp->t_pgrp = NULL;
-	LIST_REMOVE(pgrp, pg_hash);
-	SESSRELE(pgrp->pg_session);
-	pool_put(&pgrp_pool, pgrp);
+	ttyp = pgrp->pg_session->s_ttyp;
+	if (ttyp != NULL && ttyp->t_pgrp == pgrp)
+		ttyp->t_pgrp = NULL;
+
+	ss = pgrp->pg_session;
+	if (ss->s_sid == pgrp->pg_id) {
+		proclist_unlock_write(s);
+		SESSRELE(ss);
+		/* pgrp freed by sessdelete() if last reference */
+		return;
+	}
+
+	proclist_unlock_write(s);
+	pg_free(pg_id);
 }
 
 /*
+ * Delete session - called from SESSRELE when s_count becomes zero.
+ */
+void
+sessdelete(struct session *ss)
+{
+	/* We keep the pgrp with the same id as the session in
+	 * order to stop a process being given the same pid.
+	 * Since the pgrp holds a reference to the session, it
+	 * must be a 'zombie' pgrp by now.
+	 */
+
+	pg_free(ss->s_sid);
+
+	FREE(ss, M_SESSION);
+}
+
+/*
  * Adjust pgrp jobc counters when specified process changes process group.
  * We count the number of processes in each process group that "qualify"
  * the group for terminal job control (those with a parent in a different
@@ -551,7 +958,7 @@ orphanpg(struct pgrp *pg)
 	}
 }
 
-/* mark process as suid/sgid, reset some values do defaults */
+/* mark process as suid/sgid, reset some values to defaults */
 void
 p_sugid(struct proc *p)
 {
@@ -571,32 +978,45 @@ p_sugid(struct proc *p)
 	}
 }
 
-#ifdef DEBUG
+#ifdef DDB
+#include <ddb/db_output.h>
+void pidtbl_dump(void);
 void
-pgrpdump(void)
+pidtbl_dump(void)
 {
-	struct pgrp *pgrp;
+	struct pid_table *pt;
 	struct proc *p;
-	int i;
+	struct pgrp *pgrp;
+	int id;
 
-	for (i = 0; i <= pgrphash; i++) {
-		if ((pgrp = LIST_FIRST(&pgrphashtbl[i])) != NULL) {
-			printf("\tindx %d\n", i);
-			for (; pgrp != 0; pgrp = pgrp->pg_hash.le_next) {
-				printf("\tpgrp %p, pgid %d, sess %p, "
-				    "sesscnt %d, mem %p\n",
-				    pgrp, pgrp->pg_id, pgrp->pg_session,
-				    pgrp->pg_session->s_count,
-				    LIST_FIRST(&pgrp->pg_members));
-				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
-					printf("\t\tpid %d addr %p pgrp %p\n", 
-					    p->p_pid, p, p->p_pgrp);
-				}
+	db_printf("pid table %p size %x, next %x, last %x\n",
+		pid_table, pid_tbl_mask+1,
+		next_free_pt, last_free_pt);
+	for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
+		p = pt->pt_proc;
+		if (!P_VALID(p) && !pt->pt_pgrp)
+			continue;
+		db_printf("  id %x: ", id);
+		if (P_VALID(p))
+			db_printf("proc %p id %x\n", p, p->p_pid);
+		else
+			db_printf("next %x use %x\n",
+				P_NEXT(p) & pid_tbl_mask,
+				P_NEXT(p) & ~pid_tbl_mask);
+		if ((pgrp = pt->pt_pgrp)) {
+			db_printf("\tpgrp %p, pgid %d, sess %p, sesscnt %d, mem %p\n",
+			    pgrp, pgrp->pg_id, pgrp->pg_session,
+			    pgrp->pg_session->s_count,
+			    pgrp->pg_members.lh_first);
+			for (p = pgrp->pg_members.lh_first; p != 0;
+			    p = p->p_pglist.le_next) {
+				db_printf("\t\tpid %d addr %p pgrp %p\n", 
+				    p->p_pid, p, p->p_pgrp);
 			}
 		}
 	}
 }
-#endif /* DEBUG */
+#endif /* DDB */
 
 #ifdef KSTACK_CHECK_MAGIC
 #include <sys/user.h>
Index: kern/kern_prot.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_prot.c,v
retrieving revision 1.76
diff -u -p -r1.76 kern_prot.c
--- kern/kern_prot.c	2003/03/05 18:42:19	1.76
+++ kern/kern_prot.c	2003/03/12 18:35:10
@@ -275,6 +275,8 @@ sys_setsid(struct lwp *l, void *v, regis
  * if pgid != pid
  * 	there must exist some pid in same session having pgid (EPERM)
  * pid must not be session leader (EPERM)
+ *
+ * Permission checks now in enterpgrp()
  */
 /* ARGSUSED */
 int
@@ -286,30 +288,21 @@ sys_setpgid(struct lwp *l, void *v, regi
 	} */ *uap = v;
 	struct proc *curp = l->l_proc;
 	struct proc *targp;			/* target process */
-	struct pgrp *pgrp;			/* target pgrp */
 
 	if (SCARG(uap, pgid) < 0)
-		return (EINVAL);
+		return EINVAL;
 
+	/* XXX MP - there is a horrid race here with targp exiting! */
 	if (SCARG(uap, pid) != 0 && SCARG(uap, pid) != curp->p_pid) {
-		if ((targp = pfind(SCARG(uap, pid))) == 0 ||
-		    !inferior(targp, curp))
-			return (ESRCH);
-		if (targp->p_session != curp->p_session)
-			return (EPERM);
-		if (targp->p_flag & P_EXEC)
-			return (EACCES);
+		targp = pfind(SCARG(uap, pid));
+		if (targp == NULL)
+			return ESRCH;
 	} else
 		targp = curp;
-	if (SESS_LEADER(targp))
-		return (EPERM);
+
 	if (SCARG(uap, pgid) == 0)
 		SCARG(uap, pgid) = targp->p_pid;
-	else if (SCARG(uap, pgid) != targp->p_pid)
-		if ((pgrp = pgfind(SCARG(uap, pgid))) == 0 ||
-		    pgrp->pg_session != curp->p_session)
-			return (EPERM);
-	return (enterpgrp(targp, SCARG(uap, pgid), 0));
+	return enterpgrp(targp, SCARG(uap, pgid), 0);
 }
 
 /*
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.131
diff -u -p -r1.131 kern_sysctl.c
--- kern/kern_sysctl.c	2003/03/06 20:33:00	1.131
+++ kern/kern_sysctl.c	2003/03/12 18:35:18
@@ -388,7 +388,7 @@ kern_sysctl(int *name, u_int namelen, vo
 		error = sysctl_int(oldp, oldlenp, newp, newlen, &nmaxproc);
 
 		if (!error && newp) {
-			if (nmaxproc < 0 || nmaxproc >= PID_MAX - PID_SKIP)
+			if (nmaxproc < nprocs)
 				return (EINVAL);
 
 #ifdef __HAVE_CPU_MAXPROC
@@ -1789,7 +1789,7 @@ fill_eproc(struct proc *p, struct eproc 
 	if ((p->p_flag & P_CONTROLT) &&
 	    (tp = ep->e_sess->s_ttyp)) {
 		ep->e_tdev = tp->t_dev;
-		ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+		ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
 		ep->e_tsess = tp->t_session;
 	} else
 		ep->e_tdev = NODEV;
@@ -1837,7 +1837,7 @@ fill_kproc2(struct proc *p, struct kinfo
 	ki->p_sid = p->p_session->s_sid;
 	ki->p__pgid = p->p_pgrp->pg_id;
 
-	ki->p_tpgid = NO_PID;	/* may be changed if controlling tty below */
+	ki->p_tpgid = NO_PGID;	/* may be changed if controlling tty below */
 
 	ki->p_uid = p->p_ucred->cr_uid;
 	ki->p_ruid = p->p_cred->p_ruid;
@@ -1853,7 +1853,7 @@ fill_kproc2(struct proc *p, struct kinfo
 	ki->p_jobc = p->p_pgrp->pg_jobc;
 	if ((p->p_flag & P_CONTROLT) && (tp = p->p_session->s_ttyp)) {
 		ki->p_tdev = tp->t_dev;
-		ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+		ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
 		ki->p_tsess = PTRTOINT64(tp->t_session);
 	} else {
 		ki->p_tdev = NODEV;
Index: kern/sys_pipe.c
===================================================================
RCS file: /cvsroot/src/sys/kern/sys_pipe.c,v
retrieving revision 1.36
diff -u -p -r1.36 sys_pipe.c
--- kern/sys_pipe.c	2003/02/14 13:16:44	1.36
+++ kern/sys_pipe.c	2003/03/12 18:35:35
@@ -327,7 +327,6 @@ pipe_create(pipep, allockva)
 	PIPE_TIMESTAMP(&pipe->pipe_ctime);
 	pipe->pipe_atime = pipe->pipe_ctime;
 	pipe->pipe_mtime = pipe->pipe_ctime;
-	pipe->pipe_pgid = NO_PID;
 	simple_lock_init(&pipe->pipe_slock);
 	lockinit(&pipe->pipe_lock, PRIBIO | PCATCH, "pipelk", 0, 0);
 
@@ -390,7 +389,7 @@ pipeselwakeup(selp, sigp)
 		return;
 
 	pid = sigp->pipe_pgid;
-	if (pid == NO_PID || pid == 0)
+	if (pid == 0)
 		return;
 
 	if (pid < 0)
Index: kern/tty.c
===================================================================
RCS file: /cvsroot/src/sys/kern/tty.c,v
retrieving revision 1.149
diff -u -p -r1.149 tty.c
--- kern/tty.c	2003/02/17 22:23:14	1.149
+++ kern/tty.c	2003/03/12 18:35:45
@@ -894,7 +894,7 @@ ttioctl(struct tty *tp, u_long cmd, cadd
 	case TIOCGPGRP:			/* get pgrp of tty */
 		if (!isctty(p, tp))
 			return (ENOTTY);
-		*(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
+		*(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
 		break;
 	case TIOCGSID:			/* get sid of tty */
 		if (!isctty(p, tp))
-- 
David Laight: david@l8s.co.uk