Subject: Decoupling MAXPHYS and MAXBSIZE (includes patch)
To: None <tech-kern@netbsd.org>
From: Thor Lancelot Simon <tls@rek.tjls.com>
List: tech-kern
Date: 04/20/2003 13:28:03
The patch below replaces what I believe to be uses of MAXBSIZE where
MAXPHYS would be more correct.  It's important for a few reasons:

1) It's a necessary first step towards per-device MAXPHYS.

2) It corrects the abstraction violation of using filesystem constants
   in device drivers.

3) It allows clustering of file data to MAXPHYS on systems which need
   to reduce MAXBSIZE in order to get more metadata buffers (e.g. for
   directories on machines with very large numbers of directories,
   such as nbanoncvs).  Without this patch, such machines can be
   tweaked to efficiently do small I/O sizes only, but can't manage
   to both efficiently cluster larger I/O and actually cache all the
   directories needed to do small I/O to many directories at once
   in an efficient manner.

4) It will allow increasing MAXPHYS on hardware that supports it (even
   without #1 above) without burning a huge amount of KVA by increasing
   MAXBSIZE.

I've tested this patch as follows:

1) With 64K MAXBSIZE and MAXPHYS (equivalent to the current default on
   i386).

2) With 16K MAXBSIZE and 64K MAXPHYS (like nbanoncvs): there is no
   filesystem corruption, and reads and writes are clustered to 64K.

3) With 16K MAXBSIZE and 63K MAXPHYS (like some ports that are limited
   to a 64K-1 transfer count).  There is no filesystem corruption, and
   reads and writes are clustered to 32K.  I believe that these should
   be clustered to 48K (and would have been in the pre-UBC world AFAICT)
   but a limitation in the clustering code seems to mean we now get
   power-of-2 clusters only.  Still, it's no worse than it was.

A few developers have looked this over already, but comments would be
greatly appreciated.

Index: dev/ic/aic7xxx.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/aic7xxx.c,v
retrieving revision 1.97
diff -c -r1.97 aic7xxx.c
*** dev/ic/aic7xxx.c	2003/04/20 12:54:05	1.97
--- dev/ic/aic7xxx.c	2003/04/20 17:00:12
***************
*** 4358,4364 ****
  		next_scb->flags = SCB_FREE;
  
  		error = bus_dmamap_create(ahc->parent_dmat, 
! 					  AHC_MAXTRANSFER_SIZE, AHC_NSEG, MAXBSIZE, 0,
  					  BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
  					  &next_scb->dmamap);
  		if (error != 0)
--- 4358,4364 ----
  		next_scb->flags = SCB_FREE;
  
  		error = bus_dmamap_create(ahc->parent_dmat, 
! 					  AHC_MAXTRANSFER_SIZE, AHC_NSEG, MAXPHYS, 0,
  					  BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
  					  &next_scb->dmamap);
  		if (error != 0)
Index: dev/ic/mpt_netbsd.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ic/mpt_netbsd.c,v
retrieving revision 1.4
diff -c -r1.4 mpt_netbsd.c
*** dev/ic/mpt_netbsd.c	2003/04/16 23:17:30	1.4
--- dev/ic/mpt_netbsd.c	2003/04/20 17:00:13
***************
*** 246,253 ****
  		req->sense_pbuf = (pptr - MPT_SENSE_SIZE);
  		req->sense_vbuf = (vptr - MPT_SENSE_SIZE);
  
! 		error = bus_dmamap_create(mpt->sc_dmat, MAXBSIZE,
! 		    MPT_SGL_MAX, MAXBSIZE, 0, 0, &req->dmap);
  		if (error) {
  			aprint_error("%s: unable to create req %d DMA map, "
  			    "error = %d\n", mpt->sc_dev.dv_xname, i, error);
--- 246,253 ----
  		req->sense_pbuf = (pptr - MPT_SENSE_SIZE);
  		req->sense_vbuf = (vptr - MPT_SENSE_SIZE);
  
! 		error = bus_dmamap_create(mpt->sc_dmat, MAXPHYS,
! 		    MPT_SGL_MAX, MAXPHYS, 0, 0, &req->dmap);
  		if (error) {
  			aprint_error("%s: unable to create req %d DMA map, "
  			    "error = %d\n", mpt->sc_dev.dv_xname, i, error);
Index: miscfs/genfs/genfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/genfs/genfs_vnops.c,v
retrieving revision 1.75
diff -c -r1.75 genfs_vnops.c
*** miscfs/genfs/genfs_vnops.c	2003/04/10 21:53:33	1.75
--- miscfs/genfs/genfs_vnops.c	2003/04/20 17:00:16
***************
*** 1035,1041 ****
  	off_t endoff = ap->a_offhi;
  	off_t off;
  	int flags = ap->a_flags;
! 	const int maxpages = MAXBSIZE >> PAGE_SHIFT;
  	int i, s, error, npages, nback;
  	int freeflag;
  	struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
--- 1035,1042 ----
  	off_t endoff = ap->a_offhi;
  	off_t off;
  	int flags = ap->a_flags;
! 	/* Even for strange MAXPHYS, the shift rounds down to a page */
! 	const int maxpages = MAXPHYS >> PAGE_SHIFT;
  	int i, s, error, npages, nback;
  	int freeflag;
  	struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
Index: ufs/ufs/ufs_bmap.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ufs/ufs_bmap.c,v
retrieving revision 1.21
diff -c -r1.21 ufs_bmap.c
*** ufs/ufs/ufs_bmap.c	2003/04/02 10:39:44	1.21
--- ufs/ufs/ufs_bmap.c	2003/04/20 17:00:17
***************
*** 136,142 ****
  		 * don't create a block larger than the device can handle.
  		 */
  		*runp = 0;
! 		maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
  	}
  
  	if (bn >= 0 && bn < NDADDR) {
--- 136,142 ----
  		 * don't create a block larger than the device can handle.
  		 */
  		*runp = 0;
! 		maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1;
  	}
  
  	if (bn >= 0 && bn < NDADDR) {
Index: uvm/uvm_io.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_io.c,v
retrieving revision 1.17
diff -c -r1.17 uvm_io.c
*** uvm/uvm_io.c	2001/11/10 07:37:00	1.17
--- uvm/uvm_io.c	2003/04/20 17:00:17
***************
*** 94,100 ****
  		togo = togo - (endva - VM_MAXUSER_ADDRESS + 1);
  	pageoffset = baseva & PAGE_MASK;
  	baseva = trunc_page(baseva);
! 	chunksz = MIN(round_page(togo + pageoffset), MAXBSIZE);
  	error = 0;
  
  	/*
--- 94,100 ----
  		togo = togo - (endva - VM_MAXUSER_ADDRESS + 1);
  	pageoffset = baseva & PAGE_MASK;
  	baseva = trunc_page(baseva);
! 	chunksz = MIN(round_page(togo + pageoffset), trunc_page(MAXPHYS));
  	error = 0;
  
  	/*
Index: uvm/uvm_map.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_map.c,v
retrieving revision 1.136
diff -c -r1.136 uvm_map.c
*** uvm/uvm_map.c	2003/04/09 21:39:29	1.136
--- uvm/uvm_map.c	2003/04/20 17:00:19
***************
*** 2231,2236 ****
--- 2231,2237 ----
  		case MADV_NORMAL:
  		case MADV_RANDOM:
  		case MADV_SEQUENTIAL:
+ 		case MADV_WILLNEED:
  			/* nothing special here */
  			break;
  
Index: uvm/uvm_pager.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.59
diff -c -r1.59 uvm_pager.c
*** uvm/uvm_pager.c	2002/11/09 20:09:52	1.59
--- uvm/uvm_pager.c	2003/04/20 17:00:20
***************
*** 95,101 ****
  	    FALSE, NULL);
  	simple_lock_init(&pager_map_wanted_lock);
  	pager_map_wanted = FALSE;
! 	emergva = uvm_km_valloc(kernel_map, MAXBSIZE);
  	emerginuse = FALSE;
  
  	/*
--- 95,101 ----
  	    FALSE, NULL);
  	simple_lock_init(&pager_map_wanted_lock);
  	pager_map_wanted = FALSE;
! 	emergva = uvm_km_valloc(kernel_map, round_page(MAXPHYS));
  	emerginuse = FALSE;
  
  	/*
***************
*** 162,168 ****
  			emerginuse = TRUE;
  			simple_unlock(&pager_map_wanted_lock);
  			kva = emergva;
! 			KASSERT(npages <= MAXBSIZE >> PAGE_SHIFT);
  			goto enter;
  		}
  		if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) {
--- 162,169 ----
  			emerginuse = TRUE;
  			simple_unlock(&pager_map_wanted_lock);
  			kva = emergva;
! 			/* The shift implicitly truncates to PAGE_SIZE */
! 			KASSERT(npages <= (MAXPHYS >> PAGE_SHIFT));
  			goto enter;
  		}
  		if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) {
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_pdaemon.c,v
retrieving revision 1.50
diff -c -r1.50 uvm_pdaemon.c
*** uvm/uvm_pdaemon.c	2003/02/25 00:22:20	1.50
--- uvm/uvm_pdaemon.c	2003/04/20 17:00:20
***************
*** 362,368 ****
  	struct vm_page *p, *nextpg = NULL; /* Quell compiler warning */
  	struct uvm_object *uobj;
  	struct vm_anon *anon;
! 	struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT];
  	struct simplelock *slock;
  	int swnpages, swcpages;
  	int swslot;
--- 362,368 ----
  	struct vm_page *p, *nextpg = NULL; /* Quell compiler warning */
  	struct uvm_object *uobj;
  	struct vm_anon *anon;
! 	struct vm_page *swpps[round_page(MAXPHYS) >> PAGE_SHIFT];
  	struct simplelock *slock;
  	int swnpages, swcpages;
  	int swslot;
***************
*** 621,627 ****
  			 */
  
  			if (swslot == 0) {
! 				swnpages = MAXBSIZE >> PAGE_SHIFT;
  				swslot = uvm_swap_alloc(&swnpages, TRUE);
  				if (swslot == 0) {
  					simple_unlock(slock);
--- 621,629 ----
  			 */
  
  			if (swslot == 0) {
! 				/* Even with strange MAXPHYS, the shift
! 				   implicitly rounds down to a page. */
! 				swnpages = MAXPHYS >> PAGE_SHIFT;
  				swslot = uvm_swap_alloc(&swnpages, TRUE);
  				if (swslot == 0) {
  					simple_unlock(slock);