Subject: vm_mmap bug
To: None <core@NetBSD.ORG>
From: Gordon W. Ross <gwr@jericho.mc.com>
List: tech-kern
Date: 11/29/1994 12:47:08
While debugging what turned out to be unitialized syscall args,
I discovered that mmap() does NOT verify that the file position
is page-aligned as it should.  I added a check to vm_mmap(),
though maybe the check belongs in its callers.  I'm not sure.
For now this (important) check is in both places...

Is it not correct that mmap should demand page-aligned
values for the file offset (and address if MAP_FIXED)?

I've successfully tested changes to add these checks, but
noticed that it exposed (at least) one broken user program
that depended on the liberal treatment of the file offset.
The libc function nlist does an mmap to get the string table
from an object, but did not page-align the file offset.
This caused savecore to break (and I'm not sure what else).

I have a fix for libc/gen/nlist.c that I'll send to jtc.

Attached are my changes to vm_mmap.c to add checking for
page-alignment of the file offset. 

Please look this over and let me know if it looks OK to you.

Oh yes, several functions in vm_mmap.c were using SCARG(...)
all over the place instead of just using the locals, so I
cleaned that up a bit.

Gordon


*** src/sys/vm/vm_mmap.c.orig	Mon Nov 21 17:49:38 1994
--- src/sys/vm/vm_mmap.c	Tue Nov 29 12:27:01 1994
***************
*** 1,4 ****
! /*	$NetBSD: vm_mmap.c,v 1.34 1994/10/29 07:35:14 cgd Exp $	*/
  
  /*
   * Copyright (c) 1988 University of Utah.
--- 1,4 ----
! /*	$NetBSD$	*/
  
  /*
   * Copyright (c) 1988 University of Utah.
***************
*** 196,227 ****
  	vm_size_t size;
  	vm_prot_t prot, maxprot;
  	caddr_t handle;
! 	int flags, error;
  
  	prot = SCARG(uap, prot) & VM_PROT_ALL;
  	flags = SCARG(uap, flags);
! 	pos = SCARG(uap, pos);
  #ifdef DEBUG
  	if (mmapdebug & MDB_FOLLOW)
  		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
! 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len), prot,
! 		       flags, SCARG(uap, fd), pos);
  #endif
! 	/*
! 	 * Address (if FIXED) must be page aligned.
! 	 * Size is implicitly rounded to a page boundary.
! 	 */
! 	addr = (vm_offset_t) SCARG(uap, addr);
! 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
! 	    (ssize_t)SCARG(uap, len) < 0 ||
! 	    ((flags & MAP_ANON) && SCARG(uap, fd) != -1))
  		return (EINVAL);
! 	size = (vm_size_t) round_page(SCARG(uap, len));
  	/*
  	 * Check for illegal addresses.  Watch out for address wrap...
  	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
  	 */
  	if (flags & MAP_FIXED) {
  		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
  			return (EINVAL);
  		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
--- 196,234 ----
  	vm_size_t size;
  	vm_prot_t prot, maxprot;
  	caddr_t handle;
! 	int fd, flags, error;
  
+ 	addr = (vm_offset_t) SCARG(uap, addr);
+ 	size = (vm_size_t) SCARG(uap, len);
  	prot = SCARG(uap, prot) & VM_PROT_ALL;
  	flags = SCARG(uap, flags);
! 	fd = SCARG(uap, fd);
! 	pos = (vm_offset_t) SCARG(uap, pos);
! 
  #ifdef DEBUG
  	if (mmapdebug & MDB_FOLLOW)
  		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
! 		       p->p_pid, addr, size, prot, flags, fd, pos);
  #endif
! 
! 	/* No single mapping can be larger than... */
! 	if ((ssize_t)size < 0)
  		return (EINVAL);
! 	size = (vm_size_t) round_page(size);
! 
! 	/* File position must always be page-aligned. */
! 	if (pos & PAGE_MASK)
! 		return (EINVAL);
! 
  	/*
  	 * Check for illegal addresses.  Watch out for address wrap...
  	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
  	 */
  	if (flags & MAP_FIXED) {
+ 		/* Specified address must be page aligned. */
+ 		if (addr & PAGE_MASK)
+ 			return (EINVAL);
+ 		/* Address range must be all in user VM space. */
  		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
  			return (EINVAL);
  		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
***************
*** 239,258 ****
  	 */
  	else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
  		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
! 	if (flags & MAP_ANON) {
  		/*
- 		 * Mapping blank space is trivial.
- 		 */
- 		handle = NULL;
- 		maxprot = VM_PROT_ALL;
- 		pos = 0;
- 	} else {
- 		/*
  		 * Mapping file, get fp for validation.
  		 * Obtain vnode and make sure it is of appropriate type.
  		 */
! 		if (((unsigned)SCARG(uap, fd)) >= fdp->fd_nfiles ||
! 		    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
  			return (EBADF);
  		if (fp->f_type != DTYPE_VNODE)
  			return (EINVAL);
--- 246,259 ----
  	 */
  	else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
  		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
! 
! 	if ((flags & MAP_ANON) == 0) {
  		/*
  		 * Mapping file, get fp for validation.
  		 * Obtain vnode and make sure it is of appropriate type.
  		 */
! 		if (((unsigned)fd) >= fdp->fd_nfiles ||
! 		    (fp = fdp->fd_ofiles[fd]) == NULL)
  			return (EBADF);
  		if (fp->f_type != DTYPE_VNODE)
  			return (EINVAL);
***************
*** 264,299 ****
  		 * memory (ala SunOS).
  		 */
  		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
- 			handle = NULL;
- 			maxprot = VM_PROT_ALL;
  			flags |= MAP_ANON;
! 		} else {
! 			/*
! 			 * Ensure that file and memory protections are
! 			 * compatible.  Note that we only worry about
! 			 * writability if mapping is shared; in this case,
! 			 * current and max prot are dictated by the open file.
! 			 * XXX use the vnode instead?  Problem is: what
! 			 * credentials do we use for determination?
! 			 * What if proc does a setuid?
! 			 */
! 			maxprot = VM_PROT_EXECUTE;	/* ??? */
! 			if (fp->f_flag & FREAD)
! 				maxprot |= VM_PROT_READ;
! 			else if (prot & PROT_READ)
! 				return (EACCES);
! 			if (flags & MAP_SHARED) {
! 				if (fp->f_flag & FWRITE)
! 					maxprot |= VM_PROT_WRITE;
! 				else if (prot & PROT_WRITE)
! 					return (EACCES);
! 			} else
! 				maxprot |= VM_PROT_WRITE;
! 			handle = (caddr_t)vp;
  		}
  	}
  	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
! 	    flags, handle, (vm_offset_t)SCARG(uap, pos));
  	if (error == 0)
  		*retval = (register_t)addr;
  	return (error);
--- 265,309 ----
  		 * memory (ala SunOS).
  		 */
  		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
  			flags |= MAP_ANON;
! 			goto is_anon;
  		}
+ 		/*
+ 		 * Ensure that file and memory protections are
+ 		 * compatible.  Note that we only worry about
+ 		 * writability if mapping is shared; in this case,
+ 		 * current and max prot are dictated by the open file.
+ 		 * XXX use the vnode instead?  Problem is: what
+ 		 * credentials do we use for determination?
+ 		 * What if proc does a setuid?
+ 		 */
+ 		maxprot = VM_PROT_EXECUTE;	/* ??? */
+ 		if (fp->f_flag & FREAD)
+ 			maxprot |= VM_PROT_READ;
+ 		else if (prot & PROT_READ)
+ 			return (EACCES);
+ 		if (flags & MAP_SHARED) {
+ 			if (fp->f_flag & FWRITE)
+ 				maxprot |= VM_PROT_WRITE;
+ 			else if (prot & PROT_WRITE)
+ 				return (EACCES);
+ 		} else
+ 			maxprot |= VM_PROT_WRITE;
+ 		handle = (caddr_t)vp;
+ 	} else {
+ 		/*
+ 		 * (flags & MAP_ANON) == TRUE
+ 		 * Mapping blank space is trivial.
+ 		 */
+ 		if (fd != -1)
+ 			return (EINVAL);
+ 	is_anon:
+ 		handle = NULL;
+ 		maxprot = VM_PROT_ALL;
+ 		pos = 0;
  	}
  	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
! 	    flags, handle, pos);
  	if (error == 0)
  		*retval = (register_t)addr;
  	return (error);
***************
*** 314,330 ****
  	int rv;
  	boolean_t syncio, invalidate;
  
  #ifdef DEBUG
  	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
  		printf("msync(%d): addr %x len %x\n",
! 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
  #endif
! 	if (((long)SCARG(uap, addr) & PAGE_MASK) ||
! 	    SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
  		return (EINVAL);
  	map = &p->p_vmspace->vm_map;
- 	addr = (vm_offset_t)SCARG(uap, addr);
- 	size = (vm_size_t)SCARG(uap, len);
  	/*
  	 * XXX Gak!  If size is zero we are supposed to sync "all modified
  	 * pages with the region containing addr".  Unfortunately, we
--- 324,339 ----
  	int rv;
  	boolean_t syncio, invalidate;
  
+ 	addr = (vm_offset_t)SCARG(uap, addr);
+ 	size = (vm_size_t)SCARG(uap, len);
  #ifdef DEBUG
  	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
  		printf("msync(%d): addr %x len %x\n",
! 		       p->p_pid, addr, size);
  #endif
! 	if (((long)addr & PAGE_MASK) || addr + (int)size < addr)
  		return (EINVAL);
  	map = &p->p_vmspace->vm_map;
  	/*
  	 * XXX Gak!  If size is zero we are supposed to sync "all modified
  	 * pages with the region containing addr".  Unfortunately, we
***************
*** 390,405 ****
  	vm_size_t size;
  	vm_map_t map;
  
  #ifdef DEBUG
  	if (mmapdebug & MDB_FOLLOW)
  		printf("munmap(%d): addr %x len %x\n",
! 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
  #endif
  
! 	addr = (vm_offset_t) SCARG(uap, addr);
! 	if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
  		return(EINVAL);
! 	size = (vm_size_t) round_page(SCARG(uap, len));
  	if (size == 0)
  		return(0);
  	/*
--- 399,415 ----
  	vm_size_t size;
  	vm_map_t map;
  
+ 	addr = (vm_offset_t) SCARG(uap, addr);
+ 	size = (vm_size_t) SCARG(uap, len);
  #ifdef DEBUG
  	if (mmapdebug & MDB_FOLLOW)
  		printf("munmap(%d): addr %x len %x\n",
! 		       p->p_pid, addr, size);
  #endif
  
! 	if ((addr & PAGE_MASK) || (int)size < 0)
  		return(EINVAL);
! 	size = (vm_size_t) round_page(size);
  	if (size == 0)
  		return(0);
  	/*
***************
*** 453,469 ****
  	vm_size_t size;
  	register vm_prot_t prot;
  
  #ifdef DEBUG
  	if (mmapdebug & MDB_FOLLOW)
  		printf("mprotect(%d): addr %x len %x prot %d\n", p->p_pid,
! 		    SCARG(uap, addr), SCARG(uap, len), SCARG(uap, prot));
  #endif
  
! 	addr = (vm_offset_t)SCARG(uap, addr);
! 	if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
  		return(EINVAL);
- 	size = (vm_size_t)SCARG(uap, len);
- 	prot = SCARG(uap, prot) & VM_PROT_ALL;
  
  	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
  	    FALSE)) {
--- 463,479 ----
  	vm_size_t size;
  	register vm_prot_t prot;
  
+ 	addr = (vm_offset_t)SCARG(uap, addr);
+ 	size = (vm_size_t)SCARG(uap, len);
+ 	prot = SCARG(uap, prot) & VM_PROT_ALL;
  #ifdef DEBUG
  	if (mmapdebug & MDB_FOLLOW)
  		printf("mprotect(%d): addr %x len %x prot %d\n", p->p_pid,
! 		    addr, size, prot);
  #endif
  
! 	if ((addr & PAGE_MASK) || (int)size < 0)
  		return(EINVAL);
  
  	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
  	    FALSE)) {
***************
*** 521,536 ****
  	int error;
  	extern int vm_page_max_wired;
  
  #ifdef DEBUG
  	if (mmapdebug & MDB_FOLLOW)
  		printf("mlock(%d): addr %x len %x\n",
! 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
  #endif
! 	addr = (vm_offset_t)SCARG(uap, addr);
! 	if ((addr & PAGE_MASK) ||
! 	    SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
  		return (EINVAL);
! 	size = round_page((vm_size_t)SCARG(uap, len));
  	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
  		return (EAGAIN);
  #ifdef pmap_wired_count
--- 531,546 ----
  	int error;
  	extern int vm_page_max_wired;
  
+ 	addr = (vm_offset_t)SCARG(uap, addr);
+ 	size = (vm_size_t)SCARG(uap, len);
  #ifdef DEBUG
  	if (mmapdebug & MDB_FOLLOW)
  		printf("mlock(%d): addr %x len %x\n",
! 		       p->p_pid, addr, size);
  #endif
! 	if ((addr & PAGE_MASK) || addr + (int)size < addr)
  		return (EINVAL);
! 	size = (vm_size_t) round_page(size);
  	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
  		return (EAGAIN);
  #ifdef pmap_wired_count
***************
*** 559,578 ****
  	vm_size_t size;
  	int error;
  
  #ifdef DEBUG
  	if (mmapdebug & MDB_FOLLOW)
  		printf("munlock(%d): addr %x len %x\n",
! 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
  #endif
! 	addr = (vm_offset_t)SCARG(uap, addr);
! 	if ((addr & PAGE_MASK) ||
! 	    SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
  		return (EINVAL);
  #ifndef pmap_wired_count
  	if (error = suser(p->p_ucred, &p->p_acflag))
  		return (error);
  #endif
! 	size = round_page((vm_size_t)SCARG(uap, len));
  
  	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
  	return (error == KERN_SUCCESS ? 0 : ENOMEM);
--- 569,588 ----
  	vm_size_t size;
  	int error;
  
+ 	addr = (vm_offset_t)SCARG(uap, addr);
+ 	size = (vm_size_t)SCARG(uap, len);
  #ifdef DEBUG
  	if (mmapdebug & MDB_FOLLOW)
  		printf("munlock(%d): addr %x len %x\n",
! 		       p->p_pid, addr, size);
  #endif
! 	if ((addr & PAGE_MASK) || addr + (int)size < addr)
  		return (EINVAL);
  #ifndef pmap_wired_count
  	if (error = suser(p->p_ucred, &p->p_acflag))
  		return (error);
  #endif
! 	size = (vm_size_t) round_page(size);
  
  	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
  	return (error == KERN_SUCCESS ? 0 : ENOMEM);
***************
*** 603,613 ****
--- 613,634 ----
  	if (size == 0)
  		return (0);
  
+ 	/* The file offset must be page aligned. */
+ 	if (foff & PAGE_MASK)
+ 		return (EINVAL);
+ 
  	if ((flags & MAP_FIXED) == 0) {
+ 		/* The address is just a hint */
  		fitit = TRUE;
  		*addr = round_page(*addr);
  	} else {
+ 		/*
+ 		 * Use the specified address exactly
+ 		 * (but check alignment first).
+ 		 */
  		fitit = FALSE;
+ 		if (*addr & PAGE_MASK)
+ 			return (EINVAL);
  		(void)vm_deallocate(map, *addr, size);
  	}