Subject: SysV SHM locking and reallocating support
To: None <tech-kern@netbsd.org>
From: Mindaugas <unex@linija.org>
List: tech-kern
Date: 09/24/2006 01:31:54
This is a multi-part message in MIME format.
--Multipart=_Sun__24_Sep_2006_01_31_54_+0300_scRGQQD4CNIi=gsy
Content-Type: text/plain; charset=US-ASCII
Content-Transfer-Encoding: 7bit
Hello,
here is an attached patch to support SysV IPC shared memory locking to
physical memory by shmctl(2) SHM_LOCK/SHM_UNLOCK options. These options
is not defined in standard, but exists in systems like Solaris, Linux,
HP-UX and probably others. Also there is an sysctl's
kern.ipc.shm_use.phys parameter, insipired from FreeBSD.
For memory locking I have used uvm_map_pageable(), but I am not sure is
this OK. It needs testing. Please review the patch and comment it.
There is also an implementation of shmmni, shmseg and shmmaxpgs changing
via sysctl nodes. At least I have tested - it works fine.
There is an XXX comment in shmrealloc(), where, I think, should be an
interrupt locking. I'm not sure about lockings on sysctl parameters
changing?
Another question - should we need to set up some upper limits
(calculated by RAM?) for shmmaxpgs and shmni parameters (marked with
XXX comments in sysctl's functions)?
In attached patch, there is a new kern.ipc node for mentioned SHM
parameters. In my opinion, structurically better to move all SysV IPC
parameters to kern.ipc node, including the kern.sysvmsg, kern.sysvsem
and kern.sysvshm parameters. In patch I experimentally moved
kern.sysvipc_info to kern.ipc.sysvipc_info (this needs only few changes
in ipcs and emulation). And shouldn't it better to move an
initialization of these parameters form init_sysctl.c to sysv_ipc.c?
What is your thoughts of these structural sysctl's changes?
Finally, is there something essentially wrong in this patch and these
changes? If no, I would finish it.
Thanks.
P.S. Sorry if my questions is from poor knowlege of NetBSD internals.
--
Best regards,
Mindaugas
--Multipart=_Sun__24_Sep_2006_01_31_54_+0300_scRGQQD4CNIi=gsy
Content-Type: application/octet-stream;
name="sysv.diff"
Content-Disposition: attachment;
filename="sysv.diff"
Content-Transfer-Encoding: quoted-printable
Index: kern/init_sysctl.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/kern/init_sysctl.c,v
retrieving revision 1.81
diff -u -r1.81 init_sysctl.c
--- kern/init_sysctl.c 26 Jul 2006 09:33:57 -0000 1.81
+++ kern/init_sysctl.c 23 Sep 2006 21:24:38 -0000
@@ -598,10 +598,16 @@
#if defined(SYSVMSG) || defined(SYSVSEM) || defined(SYSVSHM)
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
+ CTLTYPE_NODE, "ipc",
+ SYSCTL_DESCR("SysV IPC options"),
+ NULL, 0, NULL, 0,
+ CTL_KERN, KERN_SYSVIPC, CTL_EOL);
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT,
CTLTYPE_STRUCT, "sysvipc_info",
SYSCTL_DESCR("System V style IPC information"),
sysctl_kern_sysvipc, 0, NULL, 0,
- CTL_KERN, KERN_SYSVIPC_INFO, CTL_EOL);
+ CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_INFO, CTL_EOL);
#endif /* SYSVMSG || SYSVSEM || SYSVSHM */
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT,
Index: kern/sysv_shm.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/kern/sysv_shm.c,v
retrieving revision 1.89
diff -u -r1.89 sysv_shm.c
--- kern/sysv_shm.c 23 Jul 2006 22:06:11 -0000 1.89
+++ kern/sysv_shm.c 23 Sep 2006 21:24:39 -0000
@@ -112,7 +112,7 @@
int shmid;
};
=20
-static int shm_last_free, shm_committed;
+static int shm_last_free, shm_committed, shm_curmni, shm_use_phys =3D 0;
=20
static POOL_INIT(shmmap_entry_pool, sizeof(struct shmmap_entry), 0, 0, 0,
"shmmp", &pool_allocator_nointr);
@@ -353,10 +353,17 @@
error =3D uvm_map(&p->p_vmspace->vm_map, &attach_va, size,
uobj, 0, 0,
UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, 0));
- if (error) {
- (*uobj->pgops->pgo_detach)(uobj);
- return error;
+ if (error)
+ goto error;
+
+ /* Lock the memory */
+ if (shm_use_phys) {
+ error =3D uvm_map_pageable(&p->p_vmspace->vm_map, attach_va,
+ attach_va + size, FALSE, 0);
+ if (error)
+ goto error;
}
+
shmmap_se =3D pool_get(&shmmap_entry_pool, PR_WAITOK);
shmmap_se->va =3D attach_va;
shmmap_se->shmid =3D SCARG(uap, shmid);
@@ -372,6 +379,10 @@
=20
retval[0] =3D attach_va;
return 0;
+
+error:
+ (*uobj->pgops->pgo_detach)(uobj);
+ return error;
}
=20
int
@@ -406,12 +417,18 @@
shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
{
kauth_cred_t cred =3D l->l_cred;
+ struct proc *p =3D l->l_proc;
struct shmid_ds *shmseg;
+ struct shmmap_entry *shmmap_se;
+ struct shmmap_state *shmmap_s;
+ boolean_t state;
+ size_t size;
int error =3D 0;
=20
shmseg =3D shm_find_segment_by_shmid(shmid);
if (shmseg =3D=3D NULL)
return EINVAL;
+
switch (cmd) {
case IPC_STAT:
if ((error =3D ipcperm(cred, &shmseg->shm_perm, IPC_R)) !=3D 0)
@@ -440,6 +457,24 @@
break;
case SHM_LOCK:
case SHM_UNLOCK:
+ if ((error =3D ipcperm(cred, &shmseg->shm_perm, IPC_M)) !=3D 0)
+ return error;
+ shmmap_s =3D shmmap_getprivate(p);
+ /* Find our shared memory address by shmid */
+ SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
+ if (shmmap_se->shmid =3D=3D shmid) {
+ if (cmd =3D=3D SHM_LOCK)
+ state =3D FALSE; /* Lock */
+ else
+ state =3D TRUE; /* Unlock */
+ size =3D (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
+ error =3D uvm_map_pageable(&p->p_vmspace->vm_map,
+ shmmap_se->va, shmmap_se->va + size, state, 0);
+ if (error)
+ return error;
+ }
+ }
+ break;
default:
return EINVAL;
}
@@ -487,6 +522,10 @@
struct shmid_ds *shmseg;
int error =3D 0;
=20
+ /* Reallocate a memory space if needed */
+ if (shm_curmni !=3D shminfo.shmmni)
+ shmrealloc();
+
if (SCARG(uap, size) < shminfo.shmmin ||
SCARG(uap, size) > shminfo.shmmax)
return EINVAL;
@@ -632,6 +671,43 @@
}
=20
void
+shmrealloc()
+{
+ int i, sz;
+ vaddr_t v;
+ struct shmid_ds *newshmsegs;
+
+ /* We can't reallocate lesser memory than we use */
+ if (shm_nused > shm_curmni)
+ return;
+
+ /* Allocate new memory area */
+ sz =3D shm_curmni * sizeof(struct shmid_ds);
+ v =3D uvm_km_alloc(kernel_map, round_page(sz), 0, UVM_KMF_WIRED);
+ if (v =3D=3D 0)
+ return;
+
+ newshmsegs =3D (void *)v;
+
+ /* XXX: Need locking? */
+
+ /* Copy all memory to the new area */
+ for (i =3D 0; i < shm_nused; i++)
+ bcopy(&shmsegs[i], &newshmsegs[i], sizeof(newshmsegs[0]));
+
+ /* Mark as free all new segments, if there is any */
+ for (; i < shm_curmni; i++) {
+ newshmsegs[i].shm_perm.mode =3D SHMSEG_FREE;
+ newshmsegs[i].shm_perm._seq =3D 0;
+ }
+
+ sz =3D shminfo.shmmni * sizeof(struct shmid_ds);
+ uvm_km_free(kernel_map, (vaddr_t)shmsegs, sz, UVM_KMF_WIRED);
+ shmsegs =3D newshmsegs;
+ shminfo.shmmni =3D shm_curmni;
+}
+
+void
shminit(void)
{
int i, sz;
@@ -653,4 +729,100 @@
shm_last_free =3D 0;
shm_nused =3D 0;
shm_committed =3D 0;
+ shm_curmni =3D shminfo.shmmni;
+}
+
+static int
+sysctl_ipc_shmmni(SYSCTLFN_ARGS)
+{
+ int newsize, error;
+ struct sysctlnode node;
+ node =3D *rnode;
+ node.sysctl_data =3D &newsize;
+ newsize =3D shm_curmni;
+ error =3D sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error || newp =3D=3D NULL)
+ return (error);
+
+ /* XXX: Need some limits! */
+ if (newsize < 1 /*|| newsize > SHMNI_MAX*/)
+ return (EINVAL);
+
+ shm_curmni =3D newsize;
+
+ return 0;
}
+
+static int
+sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
+{
+ int newsize, error;
+ struct sysctlnode node;
+ node =3D *rnode;
+ node.sysctl_data =3D &newsize;
+ newsize =3D shminfo.shmall;
+ error =3D sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error || newp =3D=3D NULL)
+ return (error);
+
+ /* XXX: Need some upper limit! */
+ if (newsize < 1 /*|| newsize > SHMMAXPGS_MAX*/)
+ return (EINVAL);
+
+ shminfo.shmall =3D newsize;
+ shminfo.shmmax =3D shminfo.shmall * PAGE_SIZE;
+
+ return 0;
+}
+
+SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
+{
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT,
+ CTLTYPE_NODE, "kern", NULL,
+ NULL, 0, NULL, 0,
+ CTL_KERN, CTL_EOL);
+
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT,
+ CTLTYPE_NODE, "ipc",
+ SYSCTL_DESCR("SysV IPC options"),
+ NULL, 0, NULL, 0,
+ CTL_KERN, KERN_SYSVIPC, CTL_EOL);
+
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT | CTLFLAG_READONLY,
+ CTLTYPE_INT, "shmmax",
+ SYSCTL_DESCR("Max shared memory segment size in bytes"),
+ NULL, 0, &shminfo.shmmax, 0,
+ CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
+
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
+ CTLTYPE_INT, "shmmni",
+ SYSCTL_DESCR("Max number of shared memory identifiers"),
+ sysctl_ipc_shmmni, 0, &shm_curmni, 0,
+ CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
+
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
+ CTLTYPE_INT, "shmseg",
+ SYSCTL_DESCR("Max shared memory segments per process"),
+ NULL, 0, &shminfo.shmseg, 0,
+ CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
+
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
+ CTLTYPE_INT, "shmmaxpgs",
+ SYSCTL_DESCR("Max amount of shared memory in pages"),
+ sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
+ CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
+
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
+ CTLTYPE_INT, "shm_use_phys",
+ SYSCTL_DESCR("Use only physical memory for shared memory"),
+ NULL, 0, &shm_use_phys, 0,
+ CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL);
+}
+
Index: sys/shm.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/sys/shm.h,v
retrieving revision 1.41
diff -u -r1.41 shm.h
--- sys/shm.h 23 Jul 2006 22:06:14 -0000 1.41
+++ sys/shm.h 23 Sep 2006 21:24:39 -0000
@@ -123,7 +123,6 @@
#if defined(_NETBSD_SOURCE)
/*
* Some systems (e.g. HP-UX) take these as the second (cmd) arg to shmctl(=
).
- * XXX Currently not implemented.
*/
#define SHM_LOCK 3 /* Lock segment in memory. */
#define SHM_UNLOCK 4 /* Unlock a segment locked by SHM_LOCK. */
@@ -181,6 +180,7 @@
struct vmspace;
=20
void shminit(void);
+void shmrealloc(void);
void shmfork(struct vmspace *, struct vmspace *);
void shmexit(struct vmspace *);
int shmctl1(struct lwp *, int, int, struct shmid_ds *);
Index: sys/sysctl.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /cvsroot/src/sys/sys/sysctl.h,v
retrieving revision 1.155
diff -u -r1.155 sysctl.h
--- sys/sysctl.h 30 Jul 2006 17:38:19 -0000 1.155
+++ sys/sysctl.h 23 Sep 2006 21:24:40 -0000
@@ -241,7 +241,7 @@
#define KERN_FSCALE 49 /* int: fixpt FSCALE */
#define KERN_CCPU 50 /* int: fixpt ccpu */
#define KERN_CP_TIME 51 /* struct: CPU time counters */
-#define KERN_SYSVIPC_INFO 52 /* number of valid kern ids */
+#define KERN_SYSVIPC 52 /* node: SysV IPC parameters */
#define KERN_MSGBUF 53 /* kernel message buffer */
#define KERN_CONSDEV 54 /* dev_t: console terminal device */
#define KERN_MAXPTYS 55 /* int: maximum number of ptys */
@@ -275,7 +275,6 @@
#define KERN_HARDCLOCK_TICKS 80 /* int: number of hardclock ticks */
#define KERN_MAXID 81 /* number of valid kern ids */
=20
-
#define CTL_KERN_NAMES { \
{ 0, 0 }, \
{ "ostype", CTLTYPE_STRING }, \
@@ -596,6 +595,16 @@
#define KERN_PROC_NENV 4 /* number of strings in above */
=20
/*
+ * KERN_SYSVIPC subtypes
+ */
+#define KERN_SYSVIPC_INFO 1 /* number of valid kern ids */
+#define KERN_SYSVIPC_SHMMAX 2 /* max shared memory segment size (bytes) */
+#define KERN_SYSVIPC_SHMMNI 3 /* max number of shared memory identifiers =
*/
+#define KERN_SYSVIPC_SHMSEG 4 /* max shared memory segments per process */
+#define KERN_SYSVIPC_SHMMAXPGS 5 /* max amount of shared memory (pages) */
+#define KERN_SYSVIPC_SHMUSEPHYS 6 /* physical memory usage */
+
+/*
* KERN_SYSVIPC_INFO subtypes
*/
#define KERN_SYSVIPC_MSG_INFO 1 /* msginfo and msqid_ds */
--Multipart=_Sun__24_Sep_2006_01_31_54_+0300_scRGQQD4CNIi=gsy--