Subject: Re: sysv shm eating up kernel VM
To: Jason Thorpe <thorpej@nas.nasa.gov>
From: Matthias Drochner <drochner@zelux6.zel.kfa-juelich.de>
List: tech-kern
Date: 09/09/1997 12:24:37
Excerpts from netbsd: 5-Sep-97 Re: sysv shm eating up kern.. Jason
Thorpe@nas.nasa.go (700)

>  > Would it work to borrow from FreeBSD code (which
>  > doesn't map shm into kernel space)?
> 
> I think it might be worth pursuing something like they have done, yes.

I've started with it. The FreeBSD is different in some details, would somebody
who really understands VM look whether this leads into the right direction?
(It works for the tests I tried.)

best regards
Matthias

Index: sysv_shm.c
===================================================================
RCS file: /cvsroot/src/sys/kern/sysv_shm.c,v
retrieving revision 1.38
diff -c -2 -r1.38 sysv_shm.c
*** sysv_shm.c	1996/09/01 22:53:06	1.38
--- sysv_shm.c	1997/09/09 10:14:20
***************
*** 76,80 ****
  
  struct shm_handle {
! 	vm_offset_t kva;
  };
  
--- 76,80 ----
  
  struct shm_handle {
! 	vm_object_t shm_object;
  };
  
***************
*** 132,136 ****
  	shm_handle = shmseg->shm_internal;
  	size = (shmseg->shm_segsz + CLOFSET) & ~CLOFSET;
! 	vm_deallocate(sysvshm_map, shm_handle->kva, size);
  	free((caddr_t)shm_handle, M_SHM);
  	shmseg->shm_internal = NULL;
--- 132,136 ----
  	shm_handle = shmseg->shm_internal;
  	size = (shmseg->shm_segsz + CLOFSET) & ~CLOFSET;
! 	vm_object_deallocate(shm_handle->shm_object);
  	free((caddr_t)shm_handle, M_SHM);
  	shmseg->shm_internal = NULL;
***************
*** 152,156 ****
  	shmseg = &shmsegs[segnum];
  	size = (shmseg->shm_segsz + CLOFSET) & ~CLOFSET;
! 	result = vm_deallocate(&p->p_vmspace->vm_map, shmmap_s->va, size);
  	if (result != KERN_SUCCESS)
  		return EINVAL;
--- 152,156 ----
  	shmseg = &shmsegs[segnum];
  	size = (shmseg->shm_segsz + CLOFSET) & ~CLOFSET;
! 	result = vm_map_remove(&p->p_vmspace->vm_map, shmmap_s->va,
shmmap_s->va + size);
  	if (result != KERN_SUCCESS)
  		return EINVAL;
***************
*** 205,211 ****
--- 205,213 ----
  	struct shmid_ds *shmseg;
  	struct shmmap_state *shmmap_s = NULL;
+ 	struct shm_handle *shm_handle;
  	vm_offset_t attach_va;
  	vm_prot_t prot;
  	vm_size_t size;
+ 	int rv;
  
  	shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
***************
*** 250,257 ****
  		    round_page(p->p_vmspace->vm_taddr + MAXTSIZ + MAXDSIZ);
  	}
! 	error = vm_mmap(&p->p_vmspace->vm_map, &attach_va, size, prot,
! 	    VM_PROT_DEFAULT, flags, (caddr_t)(long)SCARG(uap, shmid), 0);
! 	if (error)
! 		return error;
  	shmmap_s->va = attach_va;
  	shmmap_s->shmid = SCARG(uap, shmid);
--- 252,267 ----
  		    round_page(p->p_vmspace->vm_taddr + MAXTSIZ + MAXDSIZ);
  	}
! 	shm_handle = shmseg->shm_internal;
! 	vm_object_reference(shm_handle->shm_object);
! 	rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object,
! 		0, &attach_va, size, (flags & MAP_FIXED)?0:1);
! 	if (rv != KERN_SUCCESS) {
! 		return ENOMEM;
! 	}
! 	vm_map_protect(&p->p_vmspace->vm_map, attach_va, attach_va + size,
! 		       prot, 0);
! 	vm_map_inherit(&p->p_vmspace->vm_map,
! 		attach_va, attach_va + size, VM_INHERIT_SHARE);
! 
  	shmmap_s->va = attach_va;
  	shmmap_s->shmid = SCARG(uap, shmid);
***************
*** 374,381 ****
  	register_t *retval;
  {
! 	int i, segnum, result, shmid, size;
  	struct ucred *cred = p->p_ucred;
  	struct shmid_ds *shmseg;
  	struct shm_handle *shm_handle;
  	
  	if (SCARG(uap, size) < shminfo.shmmin ||
--- 384,392 ----
  	register_t *retval;
  {
! 	int i, segnum, shmid, size;
  	struct ucred *cred = p->p_ucred;
  	struct shmid_ds *shmseg;
  	struct shm_handle *shm_handle;
+ 	vm_pager_t pager;
  	
  	if (SCARG(uap, size) < shminfo.shmmin ||
***************
*** 409,422 ****
  	    malloc(sizeof(struct shm_handle), M_SHM, M_WAITOK);
  	shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
! 	result = vm_mmap(sysvshm_map, &shm_handle->kva, size, VM_PROT_ALL,
! 	    VM_PROT_DEFAULT, MAP_ANON, (caddr_t)(long)shmid, 0);
! 	if (result != KERN_SUCCESS) {
! 		shmseg->shm_perm.mode = SHMSEG_FREE;
! 		shm_last_free = segnum;
! 		free((caddr_t)shm_handle, M_SHM);
! 		/* Just in case. */
! 		wakeup((caddr_t)shmseg);
  		return ENOMEM;
! 	}
  	shmseg->shm_internal = shm_handle;
  	shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;
--- 420,435 ----
  	    malloc(sizeof(struct shm_handle), M_SHM, M_WAITOK);
  	shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
! 	
! 	shm_handle->shm_object = vm_object_allocate(size);
! 	if (shm_handle->shm_object == NULL)
  		return ENOMEM;
! 	/*
! 	 * We make sure that we have allocated a pager before we need
! 	 * to.
! 	 */
! 	pager = vm_pager_allocate(PG_DFLT, 0, size, VM_PROT_DEFAULT, 0);
! 	if (pager == NULL)
! 		return ENOMEM;
! 	vm_object_setpager(shm_handle->shm_object, pager, 0, 0);
  	shmseg->shm_internal = shm_handle;
  	shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;
***************
*** 515,525 ****
  {
  	int i;
- 	vm_offset_t garbage1, garbage2;
  
  	shminfo.shmmax *= NBPG;
  
- 	/* actually this *should* be pageable.  SHM_{LOCK,UNLOCK} */
- 	sysvshm_map = kmem_suballoc(kernel_map, &garbage1, &garbage2,
- 				    shminfo.shmall * NBPG, TRUE);
  	for (i = 0; i < shminfo.shmmni; i++) {
  		shmsegs[i].shm_perm.mode = SHMSEG_FREE;
--- 528,534 ----