Subject: new pid allocation code
To: None <tech-kern@netbsd.org>
From: David Laight <david@l8s.co.uk>
List: tech-kern
Date: 03/11/2003 14:36:30
Hackers,

The code below implements a different pid allocation and proc/pgrp
lookup algorithm.

The main benefits are:
- pid and pgrp lookup (by id) doesn't require a search
- no dependency on MAXUSERS
- automatically scales well to large numbers of processes
- small data footprint for small systems
- ability to enumerate through all the processes without holding a lock
  for the entire duration, or having very messy locking rules.
  (the allproc list and p_list fields could be depracted later).
- Largely MP clean

The basic idea is to ensure that you allocate a pid number which
references an empty slot in the lookup table.  To do this a FIFO freelist
is linked through the free slots of the lookup table.
To avoid reusing pid numbers, the top bits of the pid are incremented
each time the slot is reused (the last value is kept in the table).
If the table is getting full (ie a pid would be reused in a relatively
small number of forks), then the table size is doubled.
Orphaned pgrps correctly stop the pid being reused, orphaned sessions
keep the pgrp allocated.

Below is the main part of the change, there are other bits lurking
in fork and exit.  If people think this code is ok, I'll sort out a
full diff against 'current' (and fix for sparc64).

(I've been running this code for months!)

	David

Index: kern_proc.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_proc.c,v
retrieving revision 1.58
diff -u -p -r1.58 kern_proc.c
--- kern_proc.c	2003/02/15 18:10:16	1.58
+++ kern_proc.c	2003/03/11 10:41:22
@@ -98,29 +98,62 @@ __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,
 #include <sys/sa.h>
 #include <sys/savar.h>
 
+static void pg_delete(pid_t pg_id);
+
 /*
  * Structure associated with user cacheing.
  */
 struct uidinfo {
 	LIST_ENTRY(uidinfo) ui_hash;
 	uid_t	ui_uid;
 	long	ui_proccnt;
 };
 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
 LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
 u_long uihash;		/* size of hash table - 1 */
 
 /*
  * Other process lists
  */
-struct pidhashhead *pidhashtbl;
-u_long pidhash;
-struct pgrphashhead *pgrphashtbl;
-u_long pgrphash;
 
 struct proclist allproc;
 struct proclist zombproc;	/* resources have been freed */
 
+/* pid to proc lookup is done by indexing the pid_table array.
+   Since pid numbers are only allocated when an empty slot
+   has been found, there is no need to search any lists ever.
+   (an orphaned pgrp will lock the slot, a session will lock
+   the pgrp with the same number.)
+   If the table is too small it is reallocated with twice the
+   previous size and the entries 'unzipped' into the two halves.
+   A linked list of free entries is passed through the pt_proc
+   field of 'free' items - set odd to be an invalid ptr. */
+
+struct pid_table {
+	struct proc	*pt_proc;
+	struct pgrp	*pt_pgrp;
+};
+#if 1	/* strongly typed cast - should be a noop */
+static __inline intptr_t p2u(struct proc *p) { return (intptr_t)p; };
+#else
+#define p2u(p) ((intptr_t)p)
+#endif
+#define P_VALID(p) (!(p2u(p) & 1))
+#define P_NEXT(p) (p2u(p) >> 1)
+#define P_FREE(pid) ((struct proc *)((pid) << 1 | 1))
+
+static struct pid_table *pid_table;
+static uint pid_tbl_mask = (1 << 5) - 1;	/* table size 2^n */
+static uint pid_alloc_lim;	/* max we allocate before growing table */
+
+/* links through free slots - never empty! */
+static uint next_free_pt, last_free_pt;
+static pid_t pid_max = PID_MAX;	/* largest value we alocate */
 
 /*
  * Process list locking:
@@ -144,15 +177,13 @@ struct lock proclist_lock;
  * critical section of process exit, and thus locking it can't
  * modify interrupt state.  We use a simple spin lock for this
  * proclist.  Processes on this proclist are also on zombproc;
- * we use the p_hash member to linkup to deadproc.
  */
 struct simplelock deadproc_slock;
-struct proclist deadproc;	/* dead, but not yet undead */
+struct proc *deadprocs;	/* dead, but not yet undead */
 
 struct pool proc_pool;
 struct pool lwp_pool;
 struct pool lwp_uc_pool;
 struct pool pcred_pool;
 struct pool plimit_pool;
 struct pool pstats_pool;
 struct pool pgrp_pool;
@@ -187,29 +218,39 @@ void pgrpdump __P((void));
  * Initialize global process hashing structures.
  */
 void
-procinit()
+procinit(void)
 {
 	const struct proclist_desc *pd;
+	int i;
 
 	for (pd = proclists; pd->pd_list != NULL; pd++)
 		LIST_INIT(pd->pd_list);
 
 	spinlockinit(&proclist_lock, "proclk", 0);
 
-	LIST_INIT(&deadproc);
 	simple_lock_init(&deadproc_slock);
+	simple_lock_init(&uidlist_slock);
+
+	MALLOC(pid_table, struct pid_table *,
+		(pid_tbl_mask + 1) * sizeof *pid_table, M_PROC, M_WAITOK);
+	/* Set free list running through table...
+	   Preset 'use count' to -1 so we allocate pid 1 next. */
+	for (i = 0; i <= pid_tbl_mask; i++) {
+		pid_table[i].pt_proc = P_FREE((~0 & ~pid_tbl_mask) + i + 1);
+		pid_table[i].pt_pgrp = 0;
+	}
+	/* slot 0 is just grabbed */
+	next_free_pt = 1;
+	/* Need to fix fix last entry. */
+	last_free_pt = pid_tbl_mask;
+	pid_table[last_free_pt].pt_proc = P_FREE(~0 & ~pid_tbl_mask);
+	/* point at which we grow table - to avoid reusing pids too often */
+	pid_alloc_lim = pid_tbl_mask - 1;
 
 	LIST_INIT(&alllwp);
 	LIST_INIT(&deadlwp);
 	LIST_INIT(&zomblwp);
 
-	pidhashtbl =
-	    hashinit(maxproc / 4, HASH_LIST, M_PROC, M_WAITOK, &pidhash);
-	pgrphashtbl =
-	    hashinit(maxproc / 4, HASH_LIST, M_PROC, M_WAITOK, &pgrphash);
-	uihashtbl =
-	    hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash);
-
 	pool_init(&proc_pool, sizeof(struct proc), 0, 0, 0, "procpl",
 	    &pool_allocator_nointr);
 	pool_init(&lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl",
@@ -218,12 +259,11 @@ procinit()
 	    &pool_allocator_nointr);
 	pool_init(&pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl",
 	    &pool_allocator_nointr);
 	pool_init(&pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl",
 	    &pool_allocator_nointr);
 	pool_init(&plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl",
 	    &pool_allocator_nointr);
 	pool_init(&pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl",
 	    &pool_allocator_nointr);
 	pool_init(&rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl",
 	    &pool_allocator_nointr);
 	pool_init(&ras_pool, sizeof(struct ras), 0, 0, 0, "raspl",
@@ -350,16 +510,18 @@ inferior(p, q)
  * Locate a process by number
  */
 struct proc *
-pfind(pid)
-	pid_t pid;
+pfind(pid_t pid)
 {
 	struct proc *p;
 
 	proclist_lock_read();
-	LIST_FOREACH(p, PIDHASH(pid), p_hash)
-		if (p->p_pid == pid)
-			goto out;
- out:
+	p = pid_table[pid & pid_tbl_mask].pt_proc;
+	/* Only allow live processes to be found by pid. */
+	if (!P_VALID(p) || p->p_pid != pid ||
+	    !((1 << SACTIVE | 1 << SSTOP) & 1 << p->p_stat))
+		p = 0;
+
+	/* XXX MP - need to have a reference count... */
 	proclist_unlock_read();
 	return (p);
 }
@@ -368,62 +530,315 @@ pfind(pid)
  * Locate a process group by number
  */
 struct pgrp *
-pgfind(pgid)
-	pid_t pgid;
+pgfind(pid_t pgid)
 {
 	struct pgrp *pgrp;
+
+	proclist_lock_read();
+	pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
+	if (pgrp == NULL || pgrp->pg_id != pgid
+	    /* Can't look up a pgrp that only exists because the session
+	       hasn't died yet (traditional) */
+	    || LIST_EMPTY(&pgrp->pg_members))
+		pgrp = 0;
 
-	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash)
-		if (pgrp->pg_id == pgid)
-			return (pgrp);
-	return (NULL);
+	/* XXX MP - need to have a reference count... */
+	proclist_unlock_read();
+	return pgrp;
 }
 
+/*
+ * Set entry for process 0
+ */
+void
+set_proc_0(struct proc *p, struct lwp *l, struct pgrp *pgrp,
+	struct session *sess)
+{
+	int s;
+
+	LIST_INIT(&p->p_lwps);
+	LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
+	p->p_nlwps = 1;
+
+	s = proclist_lock_write();
+
+	pid_table[0].pt_proc = p;
+	LIST_INSERT_HEAD(&allproc, p, p_list);
+	LIST_INSERT_HEAD(&alllwp, l, l_list);
+
+	p->p_pgrp = pgrp;
+	pid_table[0].pt_pgrp = pgrp;
+	LIST_INIT(&pgrp->pg_members);
+	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
+
+	pgrp->pg_session = sess;
+	sess->s_count = 1;
+	sess->s_sid = 0;
+	sess->s_leader = p;
+
+	proclist_unlock_write(s);
+}
+
+static void
+expand_pid_table(void)
+{
+	uint pt_size = pid_tbl_mask + 1;
+	struct pid_table *n_pt, *new_pt;
+	struct proc *proc;
+	struct pgrp *pgrp;
+	int i;
+	int s;
+	pid_t pid;
+
+	MALLOC(new_pt, struct pid_table *, pt_size * 2 * sizeof *new_pt,
+			    M_PROC, M_WAITOK);
+
+	s = proclist_lock_write();
+	if (pt_size != pid_tbl_mask + 1) {
+		/* Another process beat us to it... */
+		proclist_unlock_write(s);
+		FREE(new_pt, M_PROC);
+		return;
+	}
+	   
+	/* Copy entries from old table into new one.
+	   If 'pid' is 'odd' we need to place in the upper half,
+	   even pid's to the lower half.
+	   Free items stay in the low half so we don't have to
+	   fixup the reference to them.
+	   We stuff free items on the front of the freelist
+	   because we can't write to unmodified entries.
+	   Processing the table backwards maintians a semblance
+	   of issueing pid numbers that increase with time. */
+	i = pt_size - 1;
+	n_pt = new_pt + i;
+	for (; ; i--, n_pt--) {
+		proc = pid_table[i].pt_proc;
+		pgrp = pid_table[i].pt_pgrp;
+		if (!P_VALID(proc)) {
+			/* Up 'use count' so that link is valid */
+			pid = (P_NEXT(proc) + pt_size) & ~pt_size;
+			proc = P_FREE(pid);
+			if (pgrp)
+				pid = pgrp->pg_id;
+		} else
+			pid = proc->p_pid;
+		
+		/* Save entry in appropriate half of table */
+		n_pt[pid & pt_size].pt_proc = proc;
+		n_pt[pid & pt_size].pt_pgrp = pgrp;
+
+		/* Put other piece on start of free list */
+		pid = (pid ^ pt_size) & ~pid_tbl_mask;
+		n_pt[pid & pt_size].pt_proc =
+				    P_FREE((pid & ~pt_size) | next_free_pt);
+		n_pt[pid & pt_size].pt_pgrp = 0;
+		next_free_pt = i | (pid & pt_size);
+		if (i == 0)
+			break;
+	}
+
+	/* Switch tables */
+	n_pt = pid_table;
+	pid_table = new_pt;
+	pid_tbl_mask = pt_size * 2 - 1;
+
+	/* pid_max starts as PID_MAX (= 30000), once we have alocated
+	   16384 pids we need it to be larger! */
+	if (pid_tbl_mask > PID_MAX) {
+		pid_max = pid_tbl_mask * 2 + 1;
+		pid_alloc_lim |= pid_alloc_lim << 1;
+	} else
+		pid_alloc_lim <<= 1;	/* doubles number of free slots... */
+
+	proclist_unlock_write(s);
+	FREE(n_pt, M_PROC);
+}
+
+struct proc *
+proc_alloc(void)
+{
+	struct proc *p;
+	int s;
+	int nxt;
+	pid_t pid;
+	struct pid_table *pt;
+
+	p =  pool_get(&proc_pool, PR_WAITOK);
+	p->p_stat = SIDL;			/* protect against others */
+
+	/* allocate next free pid */
+
+	for (;;expand_pid_table()) {
+		if (__predict_false(nprocs >= pid_alloc_lim))
+			/* ensure pids cycle through 2000+ values */
+			continue;
+		s = proclist_lock_write();
+		pt = &pid_table[next_free_pt];
+#ifdef DIAGNOSTIC
+		if (P_VALID(pt->pt_proc) || pt->pt_pgrp)
+			panic("proc_alloc: slot busy");
+#endif
+		nxt = P_NEXT(pt->pt_proc);
+		if (nxt & pid_tbl_mask)
+			break;
+		/* Table full - expand (NB last entry not used....) */
+		proclist_unlock_write(s);
+	}
+
+	/* pid is 'saved use count' + 'size' + entry */
+	pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
+	if (pid > pid_max)
+		pid &= pid_tbl_mask;
+	p->p_pid = pid;
+	next_free_pt = nxt & pid_tbl_mask;
+
+	/* Grab table slot */
+	pt->pt_proc = p;
+
+	proclist_unlock_write(s);
+
+	return p;
+}
+
+/* Free last resources of a process - from proc_free (in kern_exit.c) */
+
+void
+proc_free_mem(struct proc *p)
+{
+	int s;
+	pid_t pid = p->p_pid;
+	struct pid_table *pt;
+
+	s = proclist_lock_write();
+
+	pt = &pid_table[pid & pid_tbl_mask];
+#ifdef DIAGNOSTIC
+	if (pt->pt_proc != p)
+		panic("proc_free: pid_table mismatch, pid %x, proc %p",
+			pid, p);
+#endif
+	/* save pid use count in slot */
+	pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
+
+	if (pt->pt_pgrp == NULL) {
+		/* link last freed entry onto ours */
+		pid &= pid_tbl_mask;
+		pt = &pid_table[last_free_pt];
+		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
+		last_free_pt = pid;
+	}
+
+	nprocs--;
+	proclist_unlock_write(s);
+
+	pool_put(&proc_pool, p);
+}
+
- ##### the top of the old enterpgrp ####
+/*
+ * Move p to a new or existing process group (and session)
+ *
+ * If we are creating a new pgrp, the pgid should equal
+ * the calling processes pid.
+ * If is only valid to enter a process group that is in the session
+ * of the process.
+ * Also mksess should only be set if we are creating a process group
+ *
+ * Only called from sys_setsid, sys_setpgid/sys_setprp and the
+ * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid)
+ */
+int
+enterpgrp(struct proc *p, pid_t pgid, int mksess)
+{
+	struct pgrp *new_pgrp, *pgrp;
+	struct session *sess;
+	struct proc *curp = curproc;
+	pid_t pid = p->p_pid;
+	int rval;
+	int s;
+	pid_t pg_id = NO_PGID;
+
+	/* Allocate data areas we might need before doing any validity checks */
+	proclist_lock_read();		/* Because pid_table might change */
+	if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
+		proclist_unlock_read();
+		new_pgrp = pool_get(&pgrp_pool, PR_WAITOK);
+	} else {
+		proclist_unlock_read();
+		new_pgrp = NULL;
+	}
+	if (mksess)
+		MALLOC(sess, struct session *, sizeof(struct session),
+			    M_SESSION, M_WAITOK);
+	else
+		sess = NULL;
+
+	s = proclist_lock_write();
+	rval = EPERM;	/* most common error (to save typing) */
+
+	/* Check pgrp exists or can be created */
+	pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
+	if (pgrp != NULL && pgrp->pg_id != pgid)
+		goto done;
+
+	/* Can only set another process under restricted circumstances. */
+	if (p != curp) {
+		/* must exist and be one of our children... */
+		if (p != pid_table[pid & pid_tbl_mask].pt_proc
+		    || !inferior(p, curp)) {
+			rval = ESRCH;
+			goto done;
+		}
+		/* ... in the same session... */
+		if (sess != NULL || p->p_session != curp->p_session)
+			goto done;
+		/* ... existing pgid must be in same session ... */
+		if (pgrp != NULL && pgrp->pg_session != p->p_session)
+			goto done;
+		/* ... and not done an exec. */
+		if (p->p_flag & P_EXEC) {
+			rval = EACCES;
+			goto done;
+		}
+	}
+
+	/* Changing the process group/session of a session
+	   leader is definitely off limits. */
+	if (SESS_LEADER(p)) {
+		if (sess == NULL && p->p_pgrp == pgrp)
+			/* unless it's a definite noop */
+			rval = 0;
+		goto done;
+	}
+
+	/* Can only create a process group with id of process */
+	if (pgrp == NULL && pgid != pid)
+		goto done;
+
+	/* Can only create a session if creating pgrp */
+	if (sess != NULL && pgrp != NULL)
+		goto done;
+
+	/* Check we allocated memory for a pgrp... */
+	if (pgrp == NULL && new_pgrp == NULL)
+		goto done;
+
+	/* Don't attach to 'zombie' pgrp */
+	if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
+		goto done;
+
+	/* Expect to succeed now */
+	rval = 0;
+
+	if (pgrp == p->p_pgrp)
+		/* nothing to do */
+		goto done;
+
+	/* Ok all setup, link up required structures */
+	if (pgrp == NULL) {
+		pgrp = new_pgrp;
+		new_pgrp = 0;
+		if (sess != NULL) {
 			sess->s_sid = p->p_pid;
 			sess->s_leader = p;
 			sess->s_count = 1;
			sess->s_ttyvp = NULL;
			sess->s_ttyp = NULL;
			sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
 			memcpy(sess->s_login, p->p_session->s_login,
 			    sizeof(sess->s_login));
 			p->p_flag &= ~P_CONTROLT;
-			pgrp->pg_session = sess;
-#ifdef DIAGNOSTIC
-			if (__predict_false(p != curproc))
-				panic("enterpgrp: mksession and p != curlwp");
-#endif
 		} else {
-			SESSHOLD(p->p_session);
-			pgrp->pg_session = p->p_session;
+			sess = p->p_pgrp->pg_session;
+			SESSHOLD(sess);
 		}
+		pgrp->pg_session = sess;
+		sess = 0;
+
 		pgrp->pg_id = pgid;
 		LIST_INIT(&pgrp->pg_members);
-		LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
+#ifdef DIAGNOSTIC
+		if (pid_table[pgid & pid_tbl_mask].pt_pgrp)
+			panic("enterpgrp: pgrp table slot in use");
+		if (p != curp)
+			panic("enterpgrp: mksession and p != curlwp");
+#endif
+		pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
 		pgrp->pg_jobc = 0;
-	} else if (pgrp == p->p_pgrp)
-		return (0);
+	}
 
 	/*
 	 * Adjust eligibility of affected pgrps to participate in job control.
 	 * Increment eligibility counts before decrementing, otherwise we
 	 * could reach 0 spuriously during the first call.
 	 */
 	fixjobc(p, pgrp, 1);
 	fixjobc(p, p->p_pgrp, 0);
 
+	/* Move process to requested group */
 	LIST_REMOVE(p, p_pglist);
 	if (LIST_EMPTY(&p->p_pgrp->pg_members))
-		pgdelete(p->p_pgrp);
+		/* defer delete until we've dumped the lock */
+		pg_id = p->p_pgrp->pg_id;
 	p->p_pgrp = pgrp;
 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
-	return (0);
+
+    done:
+	proclist_unlock_write(s);
+	if (sess != NULL)
+		free(sess, M_SESSION);
+	if (new_pgrp != NULL)
+		pool_put(&pgrp_pool, new_pgrp);
+	if (pg_id != NO_PGID)
+		pg_delete(pg_id);
+#ifdef DIAGNOSTIC
+	if (rval)
+		printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
+			pid, pgid, mksess, curp->p_pid, rval);
+#endif
+	return rval;
 }
 
+
 /*
  * remove process from process group
  */
 int
-leavepgrp(p)
-	struct proc *p;
+leavepgrp(struct proc *p)
 {
+	int s = proclist_lock_write();
+	struct pgrp *pgrp;
+	pid_t pg_id;
 
+	pgrp = p->p_pgrp;
 	LIST_REMOVE(p, p_pglist);
-	if (LIST_EMPTY(&p->p_pgrp->pg_members))
-		pgdelete(p->p_pgrp);
 	p->p_pgrp = 0;
-	return (0);
+	pg_id = LIST_EMPTY(&pgrp->pg_members) ? NO_PGID : pgrp->pg_id;
+	proclist_unlock_write(s);
+
+	if (pg_id != NO_PGID)
+		pg_delete(pg_id);
+	return 0;
 }
 
+static void
+pg_free(pid_t pg_id)
+{
+	struct pgrp *pgrp;
+	struct pid_table *pt;
+	int s;
+
+	s = proclist_lock_write();
+	pt = &pid_table[pg_id & pid_tbl_mask];
+	pgrp = pt->pt_pgrp;
+#ifdef DIAGNOSTIC
+	if (!pgrp || pgrp->pg_id != pg_id || !LIST_EMPTY(pgrp->pg_members))
+		panic("pg_free: process group absent or has members");
+#endif
+	pt->pt_pgrp = 0;
+
+	if (!P_VALID(pt->pt_proc)) {
+		/* orphaned pgrp, put slot onto free list */
+#ifdef DIAGNOSTIC
+		if (P_NEXT(pt->pt_proc) & pid_tbl_mask)
+			panic("pg_free: process slot on free list");
+#endif
+
+		pg_id &= pid_tbl_mask;
+		pt = &pid_table[ last_free_pt ];
+		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
+		last_free_pt = pg_id;
+
+	}
+	proclist_unlock_write(s);
+
+	pool_put(&pgrp_pool, pgrp);
+}
+
 /*
  * delete a process group
  */
-void
-pgdelete(pgrp)
-	struct pgrp *pgrp;
+static void
+pg_delete(pid_t pg_id)
 {
+	struct pgrp *pgrp;
+	struct tty *ttyp;
+	struct session *ss;
+	int s;
+
+	s = proclist_lock_write();
+	pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
+	if (pgrp == NULL || pgrp->pg_id != pg_id || !LIST_EMPTY(&pgrp->pg_members)) {
+		proclist_unlock_write(s);
+		return;
+	}
 
 	/* Remove reference (if any) from tty to this process group */
-	if (pgrp->pg_session->s_ttyp != NULL && 
-	    pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
-		pgrp->pg_session->s_ttyp->t_pgrp = NULL;
-	LIST_REMOVE(pgrp, pg_hash);
-	SESSRELE(pgrp->pg_session);
-	pool_put(&pgrp_pool, pgrp);
+	ttyp = pgrp->pg_session->s_ttyp;
+	if (ttyp != NULL && ttyp->t_pgid == pg_id)
+		ttyp->t_pgid = NO_PGID;
+
+	ss = pgrp->pg_session;
+	if (ss->s_sid == pgrp->pg_id) {
+		proclist_unlock_write(s);
+		SESSRELE(ss);
+		/* pgrp freed by sessdelete() if last reference */
+		return;
+	}
+
+	proclist_unlock_write(s);
+	pg_free(pg_id);
 }
 
+/* Delete session - called from SESSRELE when s_count becomes zero */
+
+void
+sessdelete(struct session *ss)
+{
+	/* We keep the pgrp with the same id as the session in
+	 * order to stop a process being given the same pid.
+	 * Since the pgrp holds a reference to the session, it
+	 * must be a 'zombie' pgrp by now.
+	 */
+
+	pg_free(ss->s_sid);
+
+	FREE(ss, M_SESSION);
+}
+
 /*
  * Adjust pgrp jobc counters when specified process changes process group.
  * We count the number of processes in each process group that "qualify"

-- 
David Laight: david@l8s.co.uk