Subject: SysV SHM locking and reallocating support
To: None <tech-kern@netbsd.org>
From: Mindaugas <unex@linija.org>
List: tech-kern
Date: 09/24/2006 01:31:54
This is a multi-part message in MIME format.

--Multipart=_Sun__24_Sep_2006_01_31_54_+0300_scRGQQD4CNIi=gsy
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: 7bit

Hello,

here is an attached patch to support SysV IPC shared memory locking to
physical memory by shmctl(2) SHM_LOCK/SHM_UNLOCK options. These options
is not defined in standard, but exists in systems like Solaris, Linux,
HP-UX and probably others. Also there is an sysctl's
kern.ipc.shm_use.phys parameter, insipired from FreeBSD.
For memory locking I have used uvm_map_pageable(), but I am not sure is
this OK. It needs testing. Please review the patch and comment it.

There is also an implementation of shmmni, shmseg and shmmaxpgs changing
via sysctl nodes. At least I have tested - it works fine.
There is an XXX comment in shmrealloc(), where, I think, should be an
interrupt locking. I'm not sure about  lockings on sysctl parameters
changing?
Another question - should we need to set up some upper limits
(calculated by RAM?) for shmmaxpgs and shmni parameters (marked with
XXX comments in sysctl's functions)?

In attached patch, there is a new kern.ipc node for mentioned SHM
parameters. In my opinion, structurically better to move all SysV IPC
parameters to kern.ipc node, including the kern.sysvmsg, kern.sysvsem
and kern.sysvshm parameters. In patch I experimentally moved
kern.sysvipc_info to kern.ipc.sysvipc_info (this needs only few changes
in ipcs and emulation). And shouldn't it better to move an
initialization of these parameters form init_sysctl.c to sysv_ipc.c?
What is your thoughts of these structural sysctl's changes?

Finally, is there something essentially wrong in this patch and these
changes? If no, I would finish it.

Thanks.

P.S. Sorry if my questions is from poor knowlege of NetBSD internals.

-- 
Best regards,
Mindaugas

--Multipart=_Sun__24_Sep_2006_01_31_54_+0300_scRGQQD4CNIi=gsy
Content-Type: application/octet-stream;
 name="sysv.diff"
Content-Disposition: attachment;
 filename="sysv.diff"
Content-Transfer-Encoding: quoted-printable

Index: kern/init_sysctl.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/kern/init_sysctl.c,v
retrieving revision 1.81
diff -u -r1.81 init_sysctl.c
--- kern/init_sysctl.c	26 Jul 2006 09:33:57 -0000	1.81
+++ kern/init_sysctl.c	23 Sep 2006 21:24:38 -0000
@@ -598,10 +598,16 @@
 #if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
 	sysctl_createv(clog, 0, NULL, NULL,
 		       CTLFLAG_PERMANENT,
+		       CTLTYPE_NODE, "ipc",
+		       SYSCTL_DESCR("SysV IPC options"),
+		       NULL, 0, NULL, 0,
+		       CTL_KERN, KERN_SYSVIPC, CTL_EOL);
+	sysctl_createv(clog, 0, NULL, NULL,
+		       CTLFLAG_PERMANENT,
 		       CTLTYPE_STRUCT, "sysvipc_info",
 		       SYSCTL_DESCR("System V style IPC information"),
 		       sysctl_kern_sysvipc, 0, NULL, 0,
-		       CTL_KERN, KERN_SYSVIPC_INFO, CTL_EOL);
+		       CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_INFO, CTL_EOL);
 #endif /* SYSVMSG || SYSVSEM || SYSVSHM */
 	sysctl_createv(clog, 0, NULL, NULL,
 		       CTLFLAG_PERMANENT,
Index: kern/sysv_shm.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/kern/sysv_shm.c,v
retrieving revision 1.89
diff -u -r1.89 sysv_shm.c
--- kern/sysv_shm.c	23 Jul 2006 22:06:11 -0000	1.89
+++ kern/sysv_shm.c	23 Sep 2006 21:24:39 -0000
@@ -112,7 +112,7 @@
 	int shmid;
 };
=20
-static int	shm_last_free, shm_committed;
+static int	shm_last_free, shm_committed, shm_curmni, shm_use_phys =3D 0;
=20
 static POOL_INIT(shmmap_entry_pool, sizeof(struct shmmap_entry), 0, 0, 0,
     "shmmp", &pool_allocator_nointr);
@@ -353,10 +353,17 @@
 	error =3D uvm_map(&p->p_vmspace->vm_map, &attach_va, size,
 	    uobj, 0, 0,
 	    UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
-	if (error) {
-		(*uobj->pgops->pgo_detach)(uobj);
-		return error;
+	if (error)
+		goto error;
+
+	/* Lock the memory */
+	if (shm_use_phys) {
+		error =3D uvm_map_pageable(&p->p_vmspace->vm_map, attach_va,
+				attach_va + size, FALSE, 0);
+		if (error)
+			goto error;
 	}
+
 	shmmap_se =3D pool_get(&shmmap_entry_pool, PR_WAITOK);
 	shmmap_se->va =3D attach_va;
 	shmmap_se->shmid =3D SCARG(uap, shmid);
@@ -372,6 +379,10 @@
=20
 	retval[0] =3D attach_va;
 	return 0;
+
+error:
+	(*uobj->pgops->pgo_detach)(uobj);
+	return error;
 }
=20
 int
@@ -406,12 +417,18 @@
 shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
 {
 	kauth_cred_t cred =3D l->l_cred;
+	struct proc *p =3D l->l_proc;
 	struct shmid_ds *shmseg;
+	struct shmmap_entry *shmmap_se;
+	struct shmmap_state *shmmap_s;
+	boolean_t state;
+	size_t size;
 	int error =3D 0;
=20
 	shmseg =3D shm_find_segment_by_shmid(shmid);
 	if (shmseg =3D=3D NULL)
 		return EINVAL;
+
 	switch (cmd) {
 	case IPC_STAT:
 		if ((error =3D ipcperm(cred, &shmseg->shm_perm, IPC_R)) !=3D 0)
@@ -440,6 +457,24 @@
 		break;
 	case SHM_LOCK:
 	case SHM_UNLOCK:
+		if ((error =3D ipcperm(cred, &shmseg->shm_perm, IPC_M)) !=3D 0)
+			 return error;
+		shmmap_s =3D shmmap_getprivate(p);
+		/* Find our shared memory address by shmid */
+		SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
+			if (shmmap_se->shmid =3D=3D shmid) {
+				if (cmd =3D=3D SHM_LOCK)
+					state =3D FALSE;	/* Lock */
+				else
+					state =3D TRUE;	/* Unlock */
+				size =3D (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
+				error =3D uvm_map_pageable(&p->p_vmspace->vm_map,
+						shmmap_se->va, shmmap_se->va + size, state, 0);
+				if (error)
+					return error;
+			}
+		}
+		break;
 	default:
 		return EINVAL;
 	}
@@ -487,6 +522,10 @@
 	struct shmid_ds *shmseg;
 	int error =3D 0;
=20
+	/* Reallocate a memory space if needed */
+	if (shm_curmni !=3D shminfo.shmmni)
+		shmrealloc();
+
 	if (SCARG(uap, size) < shminfo.shmmin ||
 	    SCARG(uap, size) > shminfo.shmmax)
 		return EINVAL;
@@ -632,6 +671,43 @@
 }
=20
 void
+shmrealloc()
+{
+	int i, sz;
+	vaddr_t v;
+	struct shmid_ds *newshmsegs;
+
+	/* We can't reallocate lesser memory than we use */
+	if (shm_nused > shm_curmni)
+		return;
+
+	/* Allocate new memory area */
+	sz =3D shm_curmni * sizeof(struct shmid_ds);
+	v =3D uvm_km_alloc(kernel_map, round_page(sz), 0, UVM_KMF_WIRED);
+	if (v =3D=3D 0)
+		return;
+
+	newshmsegs =3D (void *)v;
+
+	/* XXX: Need locking? */
+
+	/* Copy all memory to the new area */
+	for (i =3D 0; i < shm_nused; i++)
+		bcopy(&shmsegs[i], &newshmsegs[i], sizeof(newshmsegs[0]));
+
+	/* Mark as free all new segments, if there is any */
+	for (; i < shm_curmni; i++) {
+		newshmsegs[i].shm_perm.mode =3D SHMSEG_FREE;
+		newshmsegs[i].shm_perm._seq =3D 0;
+	}
+
+	sz =3D shminfo.shmmni * sizeof(struct shmid_ds);
+	uvm_km_free(kernel_map, (vaddr_t)shmsegs, sz, UVM_KMF_WIRED);
+	shmsegs =3D newshmsegs;
+	shminfo.shmmni =3D shm_curmni;
+}
+
+void
 shminit(void)
 {
 	int i, sz;
@@ -653,4 +729,100 @@
 	shm_last_free =3D 0;
 	shm_nused =3D 0;
 	shm_committed =3D 0;
+	shm_curmni =3D shminfo.shmmni;
+}
+
+static int
+sysctl_ipc_shmmni(SYSCTLFN_ARGS)
+{
+	int newsize, error;
+	struct sysctlnode node;
+	node =3D *rnode;
+	node.sysctl_data =3D &newsize;
+	newsize =3D shm_curmni;
+	error =3D sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp =3D=3D NULL)
+		return (error);
+
+	/* XXX: Need some limits! */
+	if (newsize < 1 /*|| newsize > SHMNI_MAX*/)
+		return (EINVAL);
+
+	shm_curmni =3D newsize;
+
+	return 0;
 }
+
+static int
+sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
+{
+	int newsize, error;
+	struct sysctlnode node;
+	node =3D *rnode;
+	node.sysctl_data =3D &newsize;
+	newsize =3D shminfo.shmall;
+	error =3D sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp =3D=3D NULL)
+		return (error);
+
+	/* XXX: Need some upper limit! */
+	if (newsize < 1 /*|| newsize > SHMMAXPGS_MAX*/)
+		return (EINVAL);
+
+	shminfo.shmall =3D newsize;
+	shminfo.shmmax =3D shminfo.shmall * PAGE_SIZE;
+
+	return 0;
+}
+
+SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
+{
+	sysctl_createv(clog, 0, NULL, NULL,
+		CTLFLAG_PERMANENT,
+		CTLTYPE_NODE, "kern", NULL,
+		NULL, 0, NULL, 0,
+		CTL_KERN, CTL_EOL);
+
+	sysctl_createv(clog, 0, NULL, NULL,
+		CTLFLAG_PERMANENT,
+		CTLTYPE_NODE, "ipc",
+		SYSCTL_DESCR("SysV IPC options"),
+		NULL, 0, NULL, 0,
+		CTL_KERN, KERN_SYSVIPC, CTL_EOL);
+
+	sysctl_createv(clog, 0, NULL, NULL,
+		CTLFLAG_PERMANENT | CTLFLAG_READONLY,
+		CTLTYPE_INT, "shmmax",
+		SYSCTL_DESCR("Max shared memory segment size in bytes"),
+		NULL, 0, &shminfo.shmmax, 0,
+		CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
+
+	sysctl_createv(clog, 0, NULL, NULL,
+		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
+		CTLTYPE_INT, "shmmni",
+		SYSCTL_DESCR("Max number of shared memory identifiers"),
+		sysctl_ipc_shmmni, 0, &shm_curmni, 0,
+		CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
+
+	sysctl_createv(clog, 0, NULL, NULL,
+		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
+		CTLTYPE_INT, "shmseg",
+		SYSCTL_DESCR("Max shared memory segments per process"),
+		NULL, 0, &shminfo.shmseg, 0,
+		CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
+
+	sysctl_createv(clog, 0, NULL, NULL,
+		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
+		CTLTYPE_INT, "shmmaxpgs",
+		SYSCTL_DESCR("Max amount of shared memory in pages"),
+		sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
+		CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
+
+	sysctl_createv(clog, 0, NULL, NULL,
+		CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
+		CTLTYPE_INT, "shm_use_phys",
+		SYSCTL_DESCR("Use only physical memory for shared memory"),
+		NULL, 0, &shm_use_phys, 0,
+		CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL);
+}
+
Index: sys/shm.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/sys/shm.h,v
retrieving revision 1.41
diff -u -r1.41 shm.h
--- sys/shm.h	23 Jul 2006 22:06:14 -0000	1.41
+++ sys/shm.h	23 Sep 2006 21:24:39 -0000
@@ -123,7 +123,6 @@
 #if defined(_NETBSD_SOURCE)
 /*
  * Some systems (e.g. HP-UX) take these as the second (cmd) arg to shmctl(=
).
- * XXX Currently not implemented.
  */
 #define	SHM_LOCK	3	/* Lock segment in memory. */
 #define	SHM_UNLOCK	4	/* Unlock a segment locked by SHM_LOCK. */
@@ -181,6 +180,7 @@
 struct vmspace;
=20
 void	shminit(void);
+void	shmrealloc(void);
 void	shmfork(struct vmspace *, struct vmspace *);
 void	shmexit(struct vmspace *);
 int	shmctl1(struct lwp *, int, int, struct shmid_ds *);
Index: sys/sysctl.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/sys/sysctl.h,v
retrieving revision 1.155
diff -u -r1.155 sysctl.h
--- sys/sysctl.h	30 Jul 2006 17:38:19 -0000	1.155
+++ sys/sysctl.h	23 Sep 2006 21:24:40 -0000
@@ -241,7 +241,7 @@
 #define	KERN_FSCALE		49	/* int: fixpt FSCALE */
 #define	KERN_CCPU		50	/* int: fixpt ccpu */
 #define	KERN_CP_TIME		51	/* struct: CPU time counters */
-#define	KERN_SYSVIPC_INFO	52	/* number of valid kern ids */
+#define	KERN_SYSVIPC		52	/* node: SysV IPC parameters */
 #define	KERN_MSGBUF		53	/* kernel message buffer */
 #define	KERN_CONSDEV		54	/* dev_t: console terminal device */
 #define	KERN_MAXPTYS		55	/* int: maximum number of ptys */
@@ -275,7 +275,6 @@
 #define	KERN_HARDCLOCK_TICKS	80	/* int: number of hardclock ticks */
 #define	KERN_MAXID		81	/* number of valid kern ids */
=20
-
 #define	CTL_KERN_NAMES { \
 	{ 0, 0 }, \
 	{ "ostype", CTLTYPE_STRING }, \
@@ -596,6 +595,16 @@
 #define	KERN_PROC_NENV		4	/* number of strings in above */
=20
 /*
+ * KERN_SYSVIPC subtypes
+ */
+#define	KERN_SYSVIPC_INFO		1	/* number of valid kern ids */
+#define	KERN_SYSVIPC_SHMMAX		2	/* max shared memory segment size (bytes) */
+#define	KERN_SYSVIPC_SHMMNI		3	/* max number of shared memory identifiers =
*/
+#define	KERN_SYSVIPC_SHMSEG		4	/* max shared memory segments per process */
+#define	KERN_SYSVIPC_SHMMAXPGS		5	/* max amount of shared memory (pages) */
+#define	KERN_SYSVIPC_SHMUSEPHYS		6	/* physical memory usage */
+
+/*
  * KERN_SYSVIPC_INFO subtypes
  */
 #define	KERN_SYSVIPC_MSG_INFO		1	/* msginfo and msqid_ds */

--Multipart=_Sun__24_Sep_2006_01_31_54_+0300_scRGQQD4CNIi=gsy--