Subject: Re: buffer cache memory management revision
To: None <tech-kern@netbsd.org>
From: Paul Kranenburg <pk@cs.few.eur.nl>
List: tech-kern
Date: 11/29/2003 13:05:19
Here's a snapshot of my buffer cache implementation changes. Included
are MD patches for archs named a* + i386 + sparc. The resulting kernel has
run on i386 & sparc.

I would appreciate it if port-masters could look over the MD changes
(which mostly boil down to deleting calls to allocsys() and the special
buffer memory mapping code) and verify that it does the right thing.

-pk

Index: arch/algor/algor/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/algor/algor/machdep.c,v
retrieving revision 1.25
diff -c -r1.25 machdep.c
*** arch/algor/algor/machdep.c	26 Sep 2003 16:00:28 -0000	1.25
--- arch/algor/algor/machdep.c	29 Nov 2003 11:59:01 -0000
***************
*** 566,577 ****
  	mips_init_msgbuf();
  
  	/*
- 	 * Compute the size of system data structures.  pmap_bootstrap()
- 	 * needs some of this information.
- 	 */
- 	size = (vsize_t) allocsys(NULL, NULL);
- 
- 	/*
  	 * Initialize the virtual memory system.
  	 */
  	led_display('p', 'm', 'a', 'p');
--- 566,571 ----
***************
*** 588,603 ****
  	curpcb->pcb_context[11] = MIPS_INT_MASK | MIPS_SR_INT_IE; /* SR */
  
  	/*
- 	 * Allocate space for system data structures.  These data structures
- 	 * are allocated here instead of cpu_startup() because physical
- 	 * memory is directly addressable.  We don't have to map these into
- 	 * the virtual address space.
- 	 */
- 	v = (caddr_t) uvm_pageboot_alloc(size);
- 	if ((allocsys(v, NULL) - v) != size)
- 		panic("mach_init: table size inconsistency");
- 
- 	/*
  	 * Initialize debuggers, and break into them, if appropriate.
  	 */
  #if NKSYMS || defined(DDB) || defined(LKM)
--- 582,587 ----
***************
*** 629,636 ****
  void
  cpu_startup(void)
  {
- 	vsize_t size;
- 	u_int i, base, residual;
  	vaddr_t minaddr, maxaddr;
  	char pbuf[9];
  #ifdef DEBUG
--- 613,618 ----
***************
*** 672,716 ****
  	    }
  #endif
  
! 	/*
! 	 * Allocate virtual address space for file I/O buffers.
! 	 * Note they are different than the array of headers, 'buf',
! 	 * and usually occupy more virtual memory than physical.
! 	 */
! 	size = MAXBSIZE * nbuf;
! 	if (uvm_map(kernel_map, (vaddr_t *)(void *) &buffers, round_page(size),
! 		    NULL, UVM_UNKNOWN_OFFSET, 0,
! 		    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
! 				UVM_ADV_NORMAL, 0)) != 0)
! 		panic("cpu_startup: cannot allocate VM for buffers");
! 	base = bufpages / nbuf;
! 	residual = bufpages % nbuf;
! 	for (i = 0; i < nbuf; i++) {
! 		vsize_t curbufsize;
! 		vaddr_t curbuf;
! 		struct vm_page *pg;
! 
! 		/*
! 		 * Each buffer has MAXBSIZE bytes of VM space allocated.  Of
! 		 * that MAXBSIZE space, we allocate and map (base+1) pages
! 		 * for the first "residual" buffers, and then we allocate
! 		 * "base" pages for the rest.
! 		 */
! 		curbuf = (vaddr_t) buffers + (i * MAXBSIZE);
! 		curbufsize = PAGE_SIZE * ((i < residual) ? (base+1) : base);
! 
! 		while (curbufsize) {
! 			pg = uvm_pagealloc(NULL, 0, NULL, 0);
! 			if (pg == NULL)
! 				panic("cpu_startup: not enough memory for "
! 				    "buffer cache"); 
! 			pmap_kenter_pa(curbuf, VM_PAGE_TO_PHYS(pg),
! 			    VM_PROT_READ|VM_PROT_WRITE);
! 			curbuf += PAGE_SIZE;
! 			curbufsize -= PAGE_SIZE;
! 		}
! 	}
! 	pmap_update(pmap_kernel());
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
--- 654,660 ----
  	    }
  #endif
  
! 	minaddr = 0;
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
***************
*** 736,748 ****
  #endif
  	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
  	printf("avail memory = %s\n", pbuf);
- 	format_bytes(pbuf, sizeof(pbuf), bufpages * PAGE_SIZE);
- 	printf("using %u buffers containing %s of memory\n", nbuf, pbuf);
- 
- 	/*
- 	 * Set up buffers, so they can be used to read disklabels.
- 	 */
- 	bufinit();
  }
  
  int	waittime = -1;
--- 680,685 ----
Index: arch/alpha/alpha/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/alpha/alpha/machdep.c,v
retrieving revision 1.277
diff -c -r1.277 machdep.c
*** arch/alpha/alpha/machdep.c	19 Oct 2003 17:45:35 -0000	1.277
--- arch/alpha/alpha/machdep.c	29 Nov 2003 11:59:02 -0000
***************
*** 235,241 ****
  	cpuid_t cpu_id;
  	struct cpu_info *ci;
  	char *p;
- 	caddr_t v;
  	const char *bootinfo_msg;
  	const struct cpuinit *c;
  
--- 235,240 ----
***************
*** 651,667 ****
  	    (struct user *)uvm_pageboot_alloc(UPAGES * PAGE_SIZE);
  
  	/*
- 	 * Allocate space for system data structures.  These data structures
- 	 * are allocated here instead of cpu_startup() because physical
- 	 * memory is directly addressable.  We don't have to map these into
- 	 * virtual address space.
- 	 */
- 	size = (vsize_t)allocsys(NULL, NULL);
- 	v = (caddr_t)uvm_pageboot_alloc(size);
- 	if ((allocsys(v, NULL) - v) != size)
- 		panic("alpha_init: table size inconsistency");
- 
- 	/*
  	 * Initialize the virtual memory system, and set the
  	 * page table base register in proc 0's PCB.
  	 */
--- 650,655 ----
***************
*** 856,864 ****
  void
  cpu_startup()
  {
- 	u_int i, base, residual;
  	vaddr_t minaddr, maxaddr;
- 	vsize_t size;
  	char pbuf[9];
  #if defined(DEBUG)
  	extern int pmapdebug;
--- 844,850 ----
***************
*** 887,931 ****
  		printf("WARNING: %s of memory with unknown purpose\n", pbuf);
  	}
  
! 	/*
! 	 * Allocate virtual address space for file I/O buffers.
! 	 * Note they are different than the array of headers, 'buf',
! 	 * and usually occupy more virtual memory than physical.
! 	 */
! 	size = MAXBSIZE * nbuf;
! 	if (uvm_map(kernel_map, (void *) &buffers, round_page(size),
! 		    NULL, UVM_UNKNOWN_OFFSET, 0,
! 		    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
! 				UVM_ADV_NORMAL, 0)) != 0)
! 		panic("startup: cannot allocate VM for buffers");
! 	base = bufpages / nbuf;
! 	residual = bufpages % nbuf;
! 	for (i = 0; i < nbuf; i++) {
! 		vsize_t curbufsize;
! 		vaddr_t curbuf;
! 		struct vm_page *pg;
! 
! 		/*
! 		 * Each buffer has MAXBSIZE bytes of VM space allocated.  Of
! 		 * that MAXBSIZE space, we allocate and map (base+1) pages
! 		 * for the first "residual" buffers, and then we allocate
! 		 * "base" pages for the rest.
! 		 */
! 		curbuf = (vaddr_t) buffers + (i * MAXBSIZE);
! 		curbufsize = PAGE_SIZE * ((i < residual) ? (base+1) : base);
! 
! 		while (curbufsize) {
! 			pg = uvm_pagealloc(NULL, 0, NULL, 0);
! 			if (pg == NULL)
! 				panic("cpu_startup: not enough memory for "
! 				    "buffer cache");
! 			pmap_kenter_pa(curbuf, VM_PAGE_TO_PHYS(pg),
! 					VM_PROT_READ|VM_PROT_WRITE);
! 			curbuf += PAGE_SIZE;
! 			curbufsize -= PAGE_SIZE;
! 		}
! 	}
! 	pmap_update(pmap_kernel());
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
--- 873,879 ----
  		printf("WARNING: %s of memory with unknown purpose\n", pbuf);
  	}
  
! 	minaddr = 0;
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
***************
*** 959,971 ****
  		printf("stolen memory for VM structures = %s\n", pbuf);
  	}
  #endif
- 	format_bytes(pbuf, sizeof(pbuf), bufpages * PAGE_SIZE);
- 	printf("using %u buffers containing %s of memory\n", nbuf, pbuf);
- 
- 	/*
- 	 * Set up buffers, so they can be used to read disk labels.
- 	 */
- 	bufinit();
  
  	/*
  	 * Set up the HWPCB so that it's safe to configure secondary
--- 907,912 ----
Index: arch/amd64/amd64/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.14
diff -c -r1.14 machdep.c
*** arch/amd64/amd64/machdep.c	19 Oct 2003 18:14:42 -0000	1.14
--- arch/amd64/amd64/machdep.c	29 Nov 2003 11:59:02 -0000
***************
*** 253,299 ****
  	format_bytes(pbuf, sizeof(pbuf), ptoa(physmem));
  	printf("total memory = %s\n", pbuf);
  
! 	/*
! 	 * Find out how much space we need, allocate it,
! 	 * and then give everything true virtual addresses.
! 	 */
! 	sz = (unsigned long)allocsys(NULL, NULL);
! 	if ((v = (caddr_t)uvm_km_zalloc(kernel_map, round_page(sz))) == 0)
! 		panic("startup: no room for tables");
! 	v2 = allocsys(v, NULL);
! 	if ((v2 - v) != sz)
! 		panic("startup: table size inconsistency");
! 
! 	/*
! 	 * Allocate virtual address space for the buffers.  The area
! 	 * is not managed by the VM system.
! 	 */
! 	size = MAXBSIZE * nbuf;
! 	if (uvm_map(kernel_map, (vaddr_t *) (void *)&buffers, round_page(size),
! 		    NULL, UVM_UNKNOWN_OFFSET, 0,
! 		    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
! 				UVM_ADV_NORMAL, 0)) != 0)
! 		panic("cpu_startup: cannot allocate VM for buffers");
! 	minaddr = (vaddr_t)buffers;
! 	if ((bufpages / nbuf) >= btoc(MAXBSIZE)) {
! 		/* don't want to alloc more physical mem than needed */
! 		bufpages = btoc(MAXBSIZE) * nbuf;
! 	}
! 
! 	/*
! 	 * XXX We defer allocation of physical pages for buffers until
! 	 * XXX after autoconfiguration has run.  We must do this because
! 	 * XXX on system with large amounts of memory or with large
! 	 * XXX user-configured buffer caches, the buffer cache will eat
! 	 * XXX up all of the lower 16M of RAM.  This prevents ISA DMA
! 	 * XXX maps from allocating bounce pages.
! 	 *
! 	 * XXX Note that nothing can use buffer cache buffers until after
! 	 * XXX autoconfiguration completes!!
! 	 *
! 	 * XXX This is a hack, and needs to be replaced with a better
! 	 * XXX solution!  --thorpej@netbsd.org, December 6, 1997
! 	 */
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
--- 253,259 ----
  	format_bytes(pbuf, sizeof(pbuf), ptoa(physmem));
  	printf("total memory = %s\n", pbuf);
  
! 	minaddr = 0;
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
***************
*** 320,334 ****
  	lkm_map = &lkm_map_store;
  #endif
  
! 	/*
! 	 * XXX Buffer cache pages haven't yet been allocated, so
! 	 * XXX we need to account for those pages when printing
! 	 * XXX the amount of free memory.
! 	 */
! 	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free - bufpages));
  	printf("avail memory = %s\n", pbuf);
- 	format_bytes(pbuf, sizeof(pbuf), bufpages * PAGE_SIZE);
- 	printf("using %u buffers containing %s of memory\n", nbuf, pbuf);
  
  	/* Safe for i/o port / memory space allocation to use malloc now. */
  	x86_bus_space_mallocok();
--- 280,287 ----
  	lkm_map = &lkm_map_store;
  #endif
  
! 	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
  	printf("avail memory = %s\n", pbuf);
  
  	/* Safe for i/o port / memory space allocation to use malloc now. */
  	x86_bus_space_mallocok();
***************
*** 388,443 ****
          
          ci->ci_idle_tss_sel = tss_alloc(pcb);
  }       
- 
- /*
-  * XXX Finish up the deferred buffer cache allocation and initialization.
-  * XXXfvdl share.
-  */
- void
- x86_64_bufinit()
- {
- 	u_int i, base, residual;
- 
- 	base = bufpages / nbuf;
- 	residual = bufpages % nbuf;
- 	for (i = 0; i < nbuf; i++) {
- 		vsize_t curbufsize;
- 		vaddr_t curbuf;
- 		struct vm_page *pg;
- 
- 		/*
- 		 * Each buffer has MAXBSIZE bytes of VM space allocated.  Of
- 		 * that MAXBSIZE space, we allocate and map (base+1) pages
- 		 * for the first "residual" buffers, and then we allocate
- 		 * "base" pages for the rest.
- 		 */
- 		curbuf = (vaddr_t) buffers + (i * MAXBSIZE);
- 		curbufsize = PAGE_SIZE * ((i < residual) ? (base+1) : base);
- 
- 		while (curbufsize) {
- 			/*
- 			 * Attempt to allocate buffers from the first
- 			 * 16M of RAM to avoid bouncing file system
- 			 * transfers.
- 			 */
- 			pg = uvm_pagealloc_strat(NULL, 0, NULL, 0,
- 			    UVM_PGA_STRAT_FALLBACK, VM_FREELIST_FIRST16);
- 			if (pg == NULL)
- 				panic("cpu_startup: not enough memory for "
- 				    "buffer cache");
- 			pmap_kenter_pa(curbuf, VM_PAGE_TO_PHYS(pg),
- 			    VM_PROT_READ|VM_PROT_WRITE);
- 			curbuf += PAGE_SIZE;
- 			curbufsize -= PAGE_SIZE;
- 		}
- 	}
- 	pmap_update(pmap_kernel());
- 
- 	/*
- 	 * Set up buffers, so they can be used to read disk labels.
- 	 */
- 	bufinit();
- }
  
  
  /*  
--- 341,346 ----
Index: arch/amd64/amd64/autoconf.c
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/autoconf.c,v
retrieving revision 1.6
diff -c -r1.6 autoconf.c
*** arch/amd64/amd64/autoconf.c	8 Oct 2003 04:25:44 -0000	1.6
--- arch/amd64/amd64/autoconf.c	29 Nov 2003 11:59:02 -0000
***************
*** 122,130 ****
  
  	spl0();
  	lcr8(0);
- 
- 	/* XXX Finish deferred buffer cache allocation. */
- 	x86_64_bufinit();
  }
  
  void
--- 122,127 ----
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.2
diff -c -r1.2 cpu.h
*** arch/amd64/include/cpu.h	7 Aug 2003 16:26:36 -0000	1.2
--- arch/amd64/include/cpu.h	29 Nov 2003 11:59:02 -0000
***************
*** 278,284 ****
  int	cpu_maxproc __P((void));
  void	cpu_reset __P((void));
  void	x86_64_proc0_tss_ldt_init __P((void));
- void	x86_64_bufinit __P((void));
  void	x86_64_init_pcb_tss_ldt __P((struct cpu_info *));
  void	cpu_proc_fork __P((struct proc *, struct proc *));
  
--- 278,283 ----
Index: arch/amiga/amiga/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/amiga/amiga/machdep.c,v
retrieving revision 1.186
diff -c -r1.186 machdep.c
*** arch/amiga/amiga/machdep.c	27 Sep 2003 19:34:17 -0000	1.186
--- arch/amiga/amiga/machdep.c	29 Nov 2003 11:59:03 -0000
***************
*** 261,268 ****
  void
  cpu_startup()
  {
- 	caddr_t v;
- 	u_int i, base, residual;
  	char pbuf[9];
  #ifdef DEBUG
  	extern int pmapdebug;
--- 261,266 ----
***************
*** 300,358 ****
  	format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
  	printf("total memory = %s\n", pbuf);
  
! 	/*
! 	 * Find out how much space we need, allocate it,
! 	 * and then give everything true virtual addresses.
! 	 */
! 	size = (vm_size_t)allocsys(NULL, NULL);
! 	if ((v = (caddr_t)uvm_km_zalloc(kernel_map, round_page(size))) == 0)
! 		panic("startup: no room for tables");
! 	if (allocsys(v, NULL) - v != size)
! 		panic("startup: table size inconsistency");
! 
! 	/*
! 	 * Now allocate buffers proper.  They are different than the above
! 	 * in that they usually occupy more virtual memory than physical.
! 	 */
! 	size = MAXBSIZE * nbuf;
! 	if (uvm_map(kernel_map, (vm_offset_t *)(void *)&buffers, round_page(size),
! 	    NULL, UVM_UNKNOWN_OFFSET, 0,
! 	    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
! 	    UVM_ADV_NORMAL, 0)) != 0)
! 		panic("startup: cannot allocate VM for buffers");
! 	minaddr = (vm_offset_t) buffers;
! 	if ((bufpages / nbuf) >= btoc(MAXBSIZE)) {
! 		/* don't want to alloc more physical mem than needed */
! 		bufpages = btoc(MAXBSIZE) * nbuf;
! 	}
! 	base = bufpages / nbuf;
! 	residual = bufpages % nbuf;
! 	for (i = 0; i < nbuf; i++) {
! 		vm_size_t curbufsize;
! 		vm_offset_t curbuf;
! 		struct vm_page *pg;
! 
! 		/*
! 		 * Each buffer has MAXBSIZE bytes of VM space allocated.  Of
! 		 * that MAXBSIZE space, we allocate and map (base+1) pages
! 		 * for the first "residual" buffers, and then we allocate
! 		 * "base" pages for the rest.
! 		 */
! 		curbuf = (vm_offset_t) buffers + (i * MAXBSIZE);
! 		curbufsize = PAGE_SIZE * ((i < residual) ? (base+1) : base);
! 
! 		while (curbufsize) {
! 			pg = uvm_pagealloc(NULL, 0, NULL, 0);
! 			if (pg == NULL)
! 				panic("cpu_startup: not enough memory for "
! 				    "buffer cache");
! 			pmap_kenter_pa(curbuf, VM_PAGE_TO_PHYS(pg),
! 				       VM_PROT_READ|VM_PROT_WRITE);
! 			curbuf += PAGE_SIZE;
! 			curbufsize -= PAGE_SIZE;
! 		}
! 	}
! 	pmap_update(pmap_kernel());
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
--- 298,305 ----
  	format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
  	printf("total memory = %s\n", pbuf);
  
! 
! 	minaddr = 0;
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
***************
*** 379,386 ****
  #endif
  	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
  	printf("avail memory = %s\n", pbuf);
- 	format_bytes(pbuf, sizeof(pbuf), bufpages * PAGE_SIZE);
- 	printf("using %u buffers containing %s of memory\n", nbuf, pbuf);
  
  	/*
  	 * display memory configuration passed from loadbsd
--- 326,331 ----
***************
*** 401,415 ****
  
  #ifdef DEBUG_KERNEL_START
  	printf("survived initcpu...\n");
- #endif
- 
- 	/*
- 	 * Set up buffers, so they can be used to read disk labels.
- 	 */
- 	bufinit();
- 
- #ifdef DEBUG_KERNEL_START
- 	printf("survived bufinit...\n");
  #endif
  }
  
--- 346,351 ----
Index: arch/amigappc/amigappc/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/amigappc/amigappc/machdep.c,v
retrieving revision 1.25
diff -c -r1.25 machdep.c
*** arch/amigappc/amigappc/machdep.c	14 Jul 2003 23:40:34 -0000	1.25
--- arch/amigappc/amigappc/machdep.c	29 Nov 2003 11:59:03 -0000
***************
*** 702,711 ****
  void
  cpu_startup()
  {
- 	u_int i, base, residual;
  	caddr_t	v;
  	vaddr_t minaddr, maxaddr;
- 	vsize_t size;
  	char pbuf[9];
  
  	initmsgbuf((caddr_t)msgbuf_paddr, round_page(MSGBUFSIZE));
--- 702,709 ----
***************
*** 719,782 ****
  	format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
  	printf("total memory = %s\n", pbuf);
  
- 	/*
- 	 * Find out how much space we need, allocate it,
- 	 * and then give everything true virtual addresses
- 	 */
- 	size = (int)allocsys(NULL, NULL);
- 	if ((v = (caddr_t)uvm_km_zalloc(kernel_map, round_page(size))) == 0) {
- 		panic("startup: no room for tables");
- 	}
- 	if (allocsys(v, NULL) - v != size) {
- 		panic("startup: table size inconsistency");
- 	}
- 
- 	/*
- 	 * Now allocate buffers proper; they are different than the above
- 	 * in that they usually occupy more virtual memory than physical
- 	 */
- 	size = MAXBSIZE * nbuf;
  	minaddr = 0;
- 	if (uvm_map(kernel_map, (vaddr_t *)&minaddr, round_page(size), NULL,
- 		UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE,
- 		UVM_INH_NONE, UVM_ADV_NORMAL, 0)) != 0) {
- 		panic("startup: cannot allocate VM for buffers");
- 	}
- 	buffers = (char *)minaddr;
- 	base = bufpages / nbuf;
- 	residual = bufpages % nbuf;
- 	if (base >= MAXBSIZE) {
- 		/* Don't want to alloc more physical mem than ever needed */
- 		base = MAXBSIZE;
- 		residual = 0;
- 	}
- 	for (i = 0; i < nbuf; i++) {
- 		vsize_t curbufsize;
- 		vaddr_t curbuf;
- 		struct vm_page *pg;
- 
- 		/*
- 		 * Each buffer has MAXBSIZE bytes of VM space allocated.
- 		 * Of that MAXBSIZE space, we allocate and map (base+1) pages
- 		 * for the first "residual" buffers, and then we allocate
- 		 * "base" pages for the rest.
- 		 */
- 		curbuf = (vaddr_t)buffers + i * MAXBSIZE;
- 		curbufsize = PAGE_SIZE * (i < residual ? base + 1 : base);
- 
- 		while (curbufsize) {
- 			pg = uvm_pagealloc(NULL, 0, NULL, 0);
- 			if (pg == NULL) {
- 				panic("cpu_startup: not enough memory for "
- 					"buffer cache");
- 			}
- 			pmap_kenter_pa(curbuf, VM_PAGE_TO_PHYS(pg),
- 			    VM_PROT_READ | VM_PROT_WRITE);
- 			curbuf += PAGE_SIZE;
- 			curbufsize -= PAGE_SIZE;
- 		}
- 	}
- 	pmap_update(kernel_map->pmap);
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
--- 717,723 ----
***************
*** 799,811 ****
  
  	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
  	printf("avail memory = %s\n", pbuf);
- 	format_bytes(pbuf, sizeof(pbuf), bufpages * PAGE_SIZE);
- 	printf("using %u buffers containing %s of memory\n", nbuf, pbuf);
- 
- 	/*
- 	 * Set up the buffers, so they can be used to read disk labels
- 	 */
- 	bufinit();
  }
  
  /*
--- 740,745 ----
Index: arch/arc/arc/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/arc/arc/machdep.c,v
retrieving revision 1.81
diff -c -r1.81 machdep.c
*** arch/arc/arc/machdep.c	26 Sep 2003 16:00:28 -0000	1.81
--- arch/arc/arc/machdep.c	29 Nov 2003 11:59:03 -0000
***************
*** 243,249 ****
  	int i;
  	paddr_t kernstartpfn, kernendpfn, first, last;
  	caddr_t kernend, v;
- 	vsize_t size;
  
  	/* clear the BSS segment in kernel code */
  	kernend = (caddr_t)mips_round_page(end);
--- 243,248 ----
***************
*** 456,467 ****
  	mips_init_msgbuf();
  
  	/*
- 	 * Compute the size of system data structures.  pmap_bootstrap()
- 	 * needs some of this information.
- 	 */
- 	size = (vsize_t)allocsys(NULL, NULL);
- 
- 	/*
  	 * Initialize the virtual memory system.
  	 */
  	pmap_bootstrap();
--- 455,460 ----
***************
*** 474,489 ****
  	lwp0.l_md.md_regs = (struct frame *)(v + USPACE) - 1;
  	curpcb = &lwp0.l_addr->u_pcb;
  	curpcb->pcb_context[11] = MIPS_INT_MASK | MIPS_SR_INT_IE; /* SR */
- 
- 	/*
- 	 * Allocate space for system data structures.  These data structures
- 	 * are allocated here instead of cpu_startup() because physical
- 	 * memory is directly addressable.  We don't have to map these into
- 	 * virtual address space.
- 	 */
- 	v = (caddr_t)uvm_pageboot_alloc(size);
- 	if ((allocsys(v, NULL) - v) != size)
- 		panic("mach_init: table size inconsistency");
  }
  
  void
--- 467,472 ----
***************
*** 536,544 ****
  void
  cpu_startup()
  {
- 	u_int i, base, residual;
  	vaddr_t minaddr, maxaddr;
- 	vsize_t size;
  	char pbuf[9];
  #ifdef DEBUG
  	extern int pmapdebug;
--- 519,525 ----
***************
*** 555,606 ****
  	format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
  	printf("total memory = %s\n", pbuf);
  
! 	/*
! 	 * Allocate virtual address space for file I/O buffers.
! 	 * Note they are different than the array of headers, 'buf',
! 	 * and usually occupy more virtual memory than physical.
! 	 */
! 	size = MAXBSIZE * nbuf;
! 	if (uvm_map(kernel_map, (vaddr_t *)(void *)&buffers, round_page(size),
! 		    NULL, UVM_UNKNOWN_OFFSET, 0,
! 		    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
! 				UVM_ADV_NORMAL, 0)) != 0)
! 		panic("cpu_startup: cannot allocate VM for buffers");
! 
! 	minaddr = (vaddr_t)buffers;
! 	if ((bufpages / nbuf) >= btoc(MAXBSIZE)) {
! 		bufpages = btoc(MAXBSIZE) * nbuf; /* do not overallocate RAM */
! 	}
! 	base = bufpages / nbuf;
! 	residual = bufpages % nbuf;
! 
! 	/* now allocate RAM for buffers */
! 	for (i = 0; i < nbuf; i++) {
! 		vsize_t curbufsize;
! 		vaddr_t curbuf;
! 		struct vm_page *pg;
! 
! 		/*
! 		 * Each buffer has MAXBSIZE bytes of VM space allocated.  Of
! 		 * that MAXBSIZE space, we allocate and map (base+1) pages
! 		 * for the first "residual" buffers, and then we allocate
! 		 * "base" pages for the rest.
! 		 */
! 		curbuf = (vaddr_t)buffers + (i * MAXBSIZE);
! 		curbufsize = PAGE_SIZE * ((i < residual) ? (base+1) : base);
! 
! 		while (curbufsize) {
! 			pg = uvm_pagealloc(NULL, 0, NULL, 0);
! 			if (pg == NULL)
! 				panic("cpu_startup: not enough memory for "
! 				    "buffer cache");
! 			pmap_kenter_pa(curbuf, VM_PAGE_TO_PHYS(pg),
! 				       VM_PROT_READ|VM_PROT_WRITE);
! 			curbuf += PAGE_SIZE;
! 			curbufsize -= PAGE_SIZE;
! 		}
! 	}
! 	pmap_update(pmap_kernel());
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
--- 536,542 ----
  	format_bytes(pbuf, sizeof(pbuf), ctob(physmem));
  	printf("total memory = %s\n", pbuf);
  
! 	minaddr = 0;
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
***************
*** 626,638 ****
  #endif
  	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
  	printf("avail memory = %s\n", pbuf);
- 	format_bytes(pbuf, sizeof(pbuf), bufpages * PAGE_SIZE);
- 	printf("using %u buffers containing %s of memory\n", nbuf, pbuf);
- 
- 	/*
- 	 * Set up buffers, so they can be used to read disk labels.
- 	 */
- 	bufinit();
  }
  
  int	waittime = -1;
--- 562,567 ----
Index: arch/arm/arm32/arm32_machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/arm/arm32/arm32_machdep.c,v
retrieving revision 1.38
diff -c -r1.38 arm32_machdep.c
*** arch/arm/arm32/arm32_machdep.c	21 Sep 2003 00:26:09 -0000	1.38
--- arch/arm/arm32/arm32_machdep.c	29 Nov 2003 11:59:03 -0000
***************
*** 209,220 ****
  void
  cpu_startup()
  {
! 	paddr_t minaddr;
! 	paddr_t maxaddr;
! 	caddr_t sysbase;
! 	caddr_t size;
! 	vsize_t bufsize;
! 	u_int loop, base, residual;
  	char pbuf[9];
  
  	/* Set the cpu control register */
--- 209,217 ----
  void
  cpu_startup()
  {
! 	vaddr_t minaddr;
! 	vaddr_t maxaddr;
! 	u_int loop;
  	char pbuf[9];
  
  	/* Set the cpu control register */
***************
*** 249,310 ****
  	format_bytes(pbuf, sizeof(pbuf), arm_ptob(physmem));
  	printf("total memory = %s\n", pbuf);
  
! 	/*
! 	 * Find out how much space we need, allocate it,
! 	 * and then give everything true virtual addresses.
! 	 */
! 	size = allocsys(NULL, NULL);
! 	sysbase = (caddr_t)uvm_km_zalloc(kernel_map, round_page((vaddr_t)size));
! 	if (sysbase == 0)
! 		panic(
! 		    "cpu_startup: no room for system tables; %d bytes required",
! 		    (u_int)size);
! 	if ((caddr_t)((allocsys(sysbase, NULL) - sysbase)) != size)
! 		panic("cpu_startup: system table size inconsistency");
! 
!    	/*
! 	 * Now allocate buffers proper.  They are different than the above
! 	 * in that they usually occupy more virtual memory than physical.
! 	 */
! 	bufsize = MAXBSIZE * nbuf;
! 	if (uvm_map(kernel_map, (void *)&buffers, round_page(bufsize),
! 	    NULL, UVM_UNKNOWN_OFFSET, 0,
! 	    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
! 	    UVM_ADV_NORMAL, 0)) != 0)
! 		panic("cpu_startup: cannot allocate UVM space for buffers");
! 	minaddr = (vaddr_t)buffers;
! 	if ((bufpages / nbuf) >= btoc(MAXBSIZE)) {
! 		/* don't want to alloc more physical mem than needed */
! 		bufpages = btoc(MAXBSIZE) * nbuf;
! 	}
! 
! 	base = bufpages / nbuf;
! 	residual = bufpages % nbuf;
! 	for (loop = 0; loop < nbuf; ++loop) {
! 		vsize_t curbufsize;
! 		vaddr_t curbuf;
! 		struct vm_page *pg;
! 
! 		/*
! 		 * Each buffer has MAXBSIZE bytes of VM space allocated.  Of
! 		 * that MAXBSIZE space, we allocate and map (base+1) pages
! 		 * for the first "residual" buffers, and then we allocate
! 		 * "base" pages for the rest.
! 		 */
! 		curbuf = (vaddr_t) buffers + (loop * MAXBSIZE);
! 		curbufsize = PAGE_SIZE * ((loop < residual) ? (base+1) : base);
! 
! 		while (curbufsize) {
! 			pg = uvm_pagealloc(NULL, 0, NULL, 0);
! 			if (pg == NULL)
! 				panic("cpu_startup: not enough memory for buffer cache");
! 			pmap_kenter_pa(curbuf, VM_PAGE_TO_PHYS(pg),
! 				VM_PROT_READ|VM_PROT_WRITE);
! 			curbuf += PAGE_SIZE;
! 			curbufsize -= PAGE_SIZE;
! 		}
! 	}
! 	pmap_update(pmap_kernel());
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
--- 246,252 ----
  	format_bytes(pbuf, sizeof(pbuf), arm_ptob(physmem));
  	printf("total memory = %s\n", pbuf);
  
! 	minaddr = 0;
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
***************
*** 328,340 ****
  
  	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
  	printf("avail memory = %s\n", pbuf);
- 	format_bytes(pbuf, sizeof(pbuf), bufpages * PAGE_SIZE);
- 	printf("using %u buffers containing %s of memory\n", nbuf, pbuf);
- 
- 	/*
- 	 * Set up buffers, so they can be used to read disk labels.
- 	 */
- 	bufinit();
  
  	curpcb = &lwp0.l_addr->u_pcb;
  	curpcb->pcb_flags = 0;
--- 270,275 ----
Index: arch/atari/atari/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/atari/atari/machdep.c,v
retrieving revision 1.130
diff -c -r1.130 machdep.c
*** arch/atari/atari/machdep.c	27 Sep 2003 20:01:58 -0000	1.130
--- arch/atari/atari/machdep.c	29 Nov 2003 11:59:03 -0000
***************
*** 230,237 ****
  {
  	extern	 void		etext __P((void));
  	extern	 int		iomem_malloc_safe;
- 		 caddr_t	v;
- 		 u_int		i, base, residual;
  		 char		pbuf[9];
  
  #ifdef DEBUG
--- 230,235 ----
***************
*** 239,245 ****
  		 int		opmapdebug = pmapdebug;
  #endif
  		 vaddr_t	minaddr, maxaddr;
- 		 vsize_t	size = 0;
  	extern	 vsize_t	mem_size;	/* from pmap.c */
  
  #ifdef DEBUG
--- 237,242 ----
***************
*** 258,316 ****
  	format_bytes(pbuf, sizeof(pbuf), mem_size);
  	printf("total memory = %s\n", pbuf);
  
! 	/*
! 	 * Find out how much space we need, allocate it,
! 	 * and then give everything true virtual addresses.
! 	 */
! 	size = (int)allocsys(NULL, NULL);
! 	if ((v = (caddr_t)uvm_km_zalloc(kernel_map, round_page(size))) == 0)
! 		panic("startup: no room for tables");
! 	if (allocsys(v, NULL) - v != size)
! 		panic("startup: table size inconsistency");
! 
! 	/*
! 	 * Now allocate buffers proper.  They are different than the above
! 	 * in that they usually occupy more virtual memory than physical.
! 	 */
! 	size = MAXBSIZE * nbuf;
! 	if (uvm_map(kernel_map, (vaddr_t *)(void *)&buffers, round_page(size),
! 		    NULL, UVM_UNKNOWN_OFFSET, 0,
! 		    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
! 				UVM_ADV_NORMAL, 0)) != 0)
! 		panic("startup: cannot allocate VM for buffers");
! 	minaddr = (vaddr_t)buffers;
! 	if ((bufpages / nbuf) >= btoc(MAXBSIZE)) {
! 		/* don't want to alloc more physical mem than needed */
! 		bufpages = btoc(MAXBSIZE) * nbuf;
! 	}
! 	base = bufpages / nbuf;
! 	residual = bufpages % nbuf;
! 	for (i = 0; i < nbuf; i++) {
! 		vsize_t curbufsize;
! 		vaddr_t curbuf;
! 		struct vm_page *pg;
! 
! 		/*
! 		 * Each buffer has MAXBSIZE bytes of VM space allocated.  Of
! 		 * that MAXBSIZE space, we allocate and map (base+1) pages
! 		 * for the first "residual" buffers, and then we allocate
! 		 * "base" pages for the rest.
! 		 */
! 		curbuf = (vaddr_t) buffers + (i * MAXBSIZE);
! 		curbufsize = PAGE_SIZE * ((i < residual) ? (base+1) : base);
! 
! 		while (curbufsize) {
! 			pg = uvm_pagealloc(NULL, 0, NULL, 0);
! 			if (pg == NULL) 
! 				panic("cpu_startup: not enough memory for "
! 				    "buffer cache");
! 			pmap_kenter_pa(curbuf, VM_PAGE_TO_PHYS(pg),
! 			    VM_PROT_READ | VM_PROT_WRITE);
! 			curbuf += PAGE_SIZE;
! 			curbufsize -= PAGE_SIZE;
! 		}
! 	}
! 	pmap_update(kernel_map->pmap);
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
--- 255,261 ----
  	format_bytes(pbuf, sizeof(pbuf), mem_size);
  	printf("total memory = %s\n", pbuf);
  
! 	minaddr = 0;
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
***************
*** 357,369 ****
  #endif
  	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
  	printf("avail memory = %s\n", pbuf);
- 	format_bytes(pbuf, sizeof(pbuf), bufpages * PAGE_SIZE);
- 	printf("using %u buffers containing %s of memory\n", nbuf, pbuf);
- 
- 	/*
- 	 * Set up buffers, so they can be used to read disk labels.
- 	 */
- 	bufinit();
  
  	/*
  	 * Alloc extent allocation to use malloc
--- 302,307 ----
Index: arch/i386/i386/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/machdep.c,v
retrieving revision 1.543
diff -c -r1.543 machdep.c
*** arch/i386/i386/machdep.c	28 Oct 2003 22:52:53 -0000	1.543
--- arch/i386/i386/machdep.c	29 Nov 2003 11:59:04 -0000
***************
*** 282,291 ****
  void
  cpu_startup()
  {
! 	caddr_t v;
! 	int sz, x;
  	vaddr_t minaddr, maxaddr;
- 	vsize_t size;
  	char pbuf[9];
  
  	/*
--- 282,289 ----
  void
  cpu_startup()
  {
! 	int x;
  	vaddr_t minaddr, maxaddr;
  	char pbuf[9];
  
  	/*
***************
*** 315,360 ****
  	format_bytes(pbuf, sizeof(pbuf), ptoa(physmem));
  	printf("total memory = %s\n", pbuf);
  
! 	/*
! 	 * Find out how much space we need, allocate it,
! 	 * and then give everything true virtual addresses.
! 	 */
! 	sz = (int)allocsys(NULL, NULL);
! 	if ((v = (caddr_t)uvm_km_zalloc(kernel_map, round_page(sz))) == 0)
! 		panic("startup: no room for tables");
! 	if (allocsys(v, NULL) - v != sz)
! 		panic("startup: table size inconsistency");
! 
! 	/*
! 	 * Allocate virtual address space for the buffers.  The area
! 	 * is not managed by the VM system.
! 	 */
! 	size = MAXBSIZE * nbuf;
! 	if (uvm_map(kernel_map, (vaddr_t *)(void *) &buffers, round_page(size),
! 		    NULL, UVM_UNKNOWN_OFFSET, 0,
! 		    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
! 				UVM_ADV_NORMAL, 0)) != 0)
! 		panic("cpu_startup: cannot allocate VM for buffers");
! 	minaddr = (vaddr_t)buffers;
! 	if ((bufpages / nbuf) >= btoc(MAXBSIZE)) {
! 		/* don't want to alloc more physical mem than needed */
! 		bufpages = btoc(MAXBSIZE) * nbuf;
! 	}
! 
! 	/*
! 	 * XXX We defer allocation of physical pages for buffers until
! 	 * XXX after autoconfiguration has run.  We must do this because
! 	 * XXX on system with large amounts of memory or with large
! 	 * XXX user-configured buffer caches, the buffer cache will eat
! 	 * XXX up all of the lower 16M of RAM.  This prevents ISA DMA
! 	 * XXX maps from allocating bounce pages.
! 	 *
! 	 * XXX Note that nothing can use buffer cache buffers until after
! 	 * XXX autoconfiguration completes!!
! 	 *
! 	 * XXX This is a hack, and needs to be replaced with a better
! 	 * XXX solution!  --thorpej@netbsd.org, December 6, 1997
! 	 */
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
--- 313,319 ----
  	format_bytes(pbuf, sizeof(pbuf), ptoa(physmem));
  	printf("total memory = %s\n", pbuf);
  
! 	minaddr = 0;
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
***************
*** 375,389 ****
  	mb_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
  	    nmbclusters * mclbytes, VM_MAP_INTRSAFE, FALSE, NULL);
  
! 	/*
! 	 * XXX Buffer cache pages haven't yet been allocated, so
! 	 * XXX we need to account for those pages when printing
! 	 * XXX the amount of free memory.
! 	 */
! 	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free - bufpages));
  	printf("avail memory = %s\n", pbuf);
- 	format_bytes(pbuf, sizeof(pbuf), bufpages * PAGE_SIZE);
- 	printf("using %d buffers containing %s of memory\n", nbuf, pbuf);
  
  	/* Safe for i/o port / memory space allocation to use malloc now. */
  	x86_bus_space_mallocok();
--- 334,341 ----
  	mb_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
  	    nmbclusters * mclbytes, VM_MAP_INTRSAFE, FALSE, NULL);
  
! 	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
  	printf("avail memory = %s\n", pbuf);
  
  	/* Safe for i/o port / memory space allocation to use malloc now. */
  	x86_bus_space_mallocok();
***************
*** 439,493 ****
  	pcb->pcb_cr0 = rcr0();
  
  	ci->ci_idle_tss_sel = tss_alloc(pcb);
- }
- 
- /*
-  * XXX Finish up the deferred buffer cache allocation and initialization.
-  */
- void
- i386_bufinit()
- {
- 	int i, base, residual;
- 
- 	base = bufpages / nbuf;
- 	residual = bufpages % nbuf;
- 	for (i = 0; i < nbuf; i++) {
- 		vsize_t curbufsize;
- 		vaddr_t curbuf;
- 		struct vm_page *pg;
- 
- 		/*
- 		 * Each buffer has MAXBSIZE bytes of VM space allocated.  Of
- 		 * that MAXBSIZE space, we allocate and map (base+1) pages
- 		 * for the first "residual" buffers, and then we allocate
- 		 * "base" pages for the rest.
- 		 */
- 		curbuf = (vaddr_t) buffers + (i * MAXBSIZE);
- 		curbufsize = PAGE_SIZE * ((i < residual) ? (base+1) : base);
- 
- 		while (curbufsize) {
- 			/*
- 			 * Attempt to allocate buffers from the first
- 			 * 16M of RAM to avoid bouncing file system
- 			 * transfers.
- 			 */
- 			pg = uvm_pagealloc_strat(NULL, 0, NULL, 0,
- 			    UVM_PGA_STRAT_FALLBACK, VM_FREELIST_FIRST16);
- 			if (pg == NULL)
- 				panic("cpu_startup: not enough memory for "
- 				    "buffer cache");
- 			pmap_kenter_pa(curbuf, VM_PAGE_TO_PHYS(pg),
- 			    VM_PROT_READ|VM_PROT_WRITE);
- 			curbuf += PAGE_SIZE;
- 			curbufsize -= PAGE_SIZE;
- 		}
- 	}
- 	pmap_update(pmap_kernel());
- 
- 	/*
- 	 * Set up buffers, so they can be used to read disk labels.
- 	 */
- 	bufinit();
  }
  
  /*
--- 391,396 ----
Index: arch/i386/i386/autoconf.c
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/autoconf.c,v
retrieving revision 1.74
diff -c -r1.74 autoconf.c
*** arch/i386/i386/autoconf.c	27 Oct 2003 14:11:46 -0000	1.74
--- arch/i386/i386/autoconf.c	29 Nov 2003 11:59:04 -0000
***************
*** 154,162 ****
  #if NLAPIC > 0
  	lapic_tpr = 0;
  #endif
- 
- 	/* XXX Finish deferred buffer cache allocation. */
- 	i386_bufinit();
  }
  
  void
--- 154,159 ----
Index: arch/i386/include/cpu.h
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/include/cpu.h,v
retrieving revision 1.109
diff -c -r1.109 cpu.h
*** arch/i386/include/cpu.h	27 Oct 2003 13:44:20 -0000	1.109
--- arch/i386/include/cpu.h	29 Nov 2003 11:59:04 -0000
***************
*** 351,357 ****
  void	cpu_reset(void);
  void	i386_init_pcb_tss_ldt(struct cpu_info *);
  void	i386_proc0_tss_ldt_init(void);
- void	i386_bufinit(void);
  
  /* identcpu.c */
  extern int tmx86_has_longrun;
--- 351,356 ----
Index: arch/sparc/sparc/machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/sparc/sparc/machdep.c,v
retrieving revision 1.240
diff -c -r1.240 machdep.c
*** arch/sparc/sparc/machdep.c	28 Oct 2003 15:25:27 -0000	1.240
--- arch/sparc/sparc/machdep.c	29 Nov 2003 11:59:05 -0000
***************
*** 162,177 ****
  void	dumpsys __P((void));
  void	stackdump __P((void));
  
- caddr_t	mdallocsys __P((caddr_t));
- 
  /*
   * Machine-dependent startup code
   */
  void
  cpu_startup()
  {
- 	caddr_t v;
- 	u_int i, base, residual;
  #ifdef DEBUG
  	extern int pmapdebug;
  	int opmapdebug = pmapdebug;
--- 162,173 ----
***************
*** 286,355 ****
  	}
  
  	/*
- 	 * Find out how much space we need, allocate it,
- 	 * and then give everything true virtual addresses.
- 	 */
- 	size = (vsize_t)allocsys(NULL, mdallocsys);
- 
- 	if ((v = (caddr_t)uvm_km_alloc(kernel_map, round_page(size))) == 0)
- 		panic("startup: no room for tables");
- 
- 	if ((vsize_t)(allocsys(v, mdallocsys) - v) != size)
- 		panic("startup: table size inconsistency");
- 
-         /*
-          * allocate virtual and physical memory for the buffers.
-          */
-         size = MAXBSIZE * nbuf;         /* # bytes for buffers */
- 
-         /* allocate VM for buffers... area is not managed by VM system */
-         if (uvm_map(kernel_map, (void *)&buffers, round_page(size),
-                     NULL, UVM_UNKNOWN_OFFSET, 0,
-                     UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
-                                 UVM_ADV_NORMAL, 0)) != 0)
-         	panic("cpu_startup: cannot allocate VM for buffers");
- 
-         minaddr = (vaddr_t) buffers;
-         if ((bufpages / nbuf) >= btoc(MAXBSIZE)) {
-         	bufpages = btoc(MAXBSIZE) * nbuf; /* do not overallocate RAM */
-         }
-         base = bufpages / nbuf;
-         residual = bufpages % nbuf;
- 
-         /* now allocate RAM for buffers */
- 	for (i = 0 ; i < nbuf ; i++) {
- 		vaddr_t curbuf;
- 		vsize_t curbufsize;
- 		struct vm_page *pg;
- 
- 		/*
- 		 * each buffer has MAXBSIZE bytes of VM space allocated.  of
- 		 * that MAXBSIZE space we allocate and map (base+1) pages
- 		 * for the first "residual" buffers, and then we allocate
- 		 * "base" pages for the rest.
- 		 */
- 		curbuf = (vaddr_t) buffers + (i * MAXBSIZE);
- 		curbufsize = PAGE_SIZE * ((i < residual) ? (base+1) : base);
- 
- 		while (curbufsize) {
- 			pg = uvm_pagealloc(NULL, 0, NULL, 0);
- 			if (pg == NULL)
- 				panic("cpu_startup: "
- 				    "not enough RAM for buffer cache");
- 			pmap_kenter_pa(curbuf, VM_PAGE_TO_PHYS(pg),
- 			    VM_PROT_READ | VM_PROT_WRITE);
- 			curbuf += PAGE_SIZE;
- 			curbufsize -= PAGE_SIZE;
- 		}
- 	}
- 	pmap_update(pmap_kernel());
- 
- 	/*
  	 * Allocate a submap for exec arguments.  This map effectively
  	 * limits the number of processes exec'ing at any time.
  	 */
!         exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
!                                  16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
  
  	if (CPU_ISSUN4 || CPU_ISSUN4C) {
  		/*
--- 282,293 ----
  	}
  
  	/*
  	 * Allocate a submap for exec arguments.  This map effectively
  	 * limits the number of processes exec'ing at any time.
  	 */
! 	minaddr = 0;
! 	exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
! 				   16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
  
  	if (CPU_ISSUN4 || CPU_ISSUN4C) {
  		/*
***************
*** 374,402 ****
  #endif
  	format_bytes(pbuf, sizeof(pbuf), ptoa(uvmexp.free));
  	printf("avail memory = %s\n", pbuf);
- 	format_bytes(pbuf, sizeof(pbuf), bufpages * PAGE_SIZE);
- 	printf("using %u buffers containing %s of memory\n", nbuf, pbuf);
- 
- 	/*
- 	 * Set up buffers, so they can be used to read disk labels.
- 	 */
- 	bufinit();
  
  	pmap_redzone();
  }
  
- caddr_t
- mdallocsys(v)
- 	caddr_t v;
- {
- 
- 	/* Clip bufpages if necessary. */
- 	if (CPU_ISSUN4C && bufpages > (128 * (65536/MAXBSIZE)))
- 		bufpages = (128 * (65536/MAXBSIZE));
- 
- 	return (v);
- }
- 
  /*
   * Set up registers on exec.
   *
--- 312,321 ----
***************
*** 857,863 ****
   *
   *	Send an an upcall to userland.
   */
! void 
  cpu_upcall(struct lwp *l, int type, int nevents, int ninterrupted,
  	   void *sas, void *ap, void *sp, sa_upcall_t upcall)
  {
--- 776,782 ----
   *
   *	Send an an upcall to userland.
   */
! void
  cpu_upcall(struct lwp *l, int type, int nevents, int ninterrupted,
  	   void *sas, void *ap, void *sp, sa_upcall_t upcall)
  {
Index: conf/files
===================================================================
RCS file: /cvsroot/src/sys/conf/files,v
retrieving revision 1.644
diff -c -r1.644 files
*** conf/files	16 Nov 2003 12:10:41 -0000	1.644
--- conf/files	29 Nov 2003 11:59:05 -0000
***************
*** 1112,1118 ****
  file	kern/init_main.c
  file	kern/init_sysent.c
  file	kern/kern_acct.c
- file	kern/kern_allocsys.c
  file	kern/kern_clock.c
  file	kern/kern_descrip.c
  file	kern/kern_event.c
--- 1112,1117 ----
Index: conf/param.c
===================================================================
RCS file: /cvsroot/src/sys/conf/param.c,v
retrieving revision 1.43
diff -c -r1.43 param.c
*** conf/param.c	30 Oct 2003 20:37:01 -0000	1.43
--- conf/param.c	29 Nov 2003 11:59:05 -0000
***************
*** 186,199 ****
  #endif
  
  /*
-  * These have to be allocated somewhere; allocating
-  * them here forces loader errors if this file is omitted
-  * (if they've been externed everywhere else; hah!).
-  */
- struct	buf *buf;
- char	*buffers;
- 
- /*
   * These control when and to what priority a process gets after a certain
   * amount of CPU time expires.  AUTONICETIME is in seconds.
   * AUTONICEVAL is NOT offset by NZERO, i.e. it's between PRIO_MIN and PRIO_MAX.
--- 186,191 ----
Index: kern/init_main.c
===================================================================
RCS file: /cvsroot/src/sys/kern/init_main.c,v
retrieving revision 1.227
diff -c -r1.227 init_main.c
*** kern/init_main.c	14 Nov 2003 07:13:25 -0000	1.227
--- kern/init_main.c	29 Nov 2003 11:59:05 -0000
***************
*** 258,263 ****
--- 258,266 ----
  	/* Initialize callouts. */
  	callout_startup();
  
+ 	/* Initialize the buffer cache */
+ 	bufinit();
+ 
  	/*
  	 * Initialize mbuf's.  Do this now because we might attempt to
  	 * allocate mbufs or mbuf clusters during autoconfiguration.
Index: kern/vfs_bio.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_bio.c,v
retrieving revision 1.97
diff -c -r1.97 vfs_bio.c
*** kern/vfs_bio.c	8 Nov 2003 04:22:35 -0000	1.97
--- kern/vfs_bio.c	29 Nov 2003 11:59:06 -0000
***************
*** 77,82 ****
--- 77,83 ----
   *		UNIX Operating System (Addison Welley, 1989)
   */
  
+ #include "opt_bufcache.h"
  #include "opt_softdep.h"
  
  #include <sys/cdefs.h>
***************
*** 84,89 ****
--- 85,91 ----
  
  #include <sys/param.h>
  #include <sys/systm.h>
+ #include <sys/kernel.h>
  #include <sys/proc.h>
  #include <sys/buf.h>
  #include <sys/vnode.h>
***************
*** 96,101 ****
--- 98,121 ----
  
  #include <miscfs/specfs/specdev.h>
  
+ #ifndef	BUFPAGES
+ # define BUFPAGES 0
+ #endif
+ 
+ #ifdef BUFCACHE
+ # if (BUFCACHE < 5) || (BUFCACHE > 95)
+ #  error BUFCACHE is not between 5 and 95
+ # endif
+ #else
+   /* Default to 10% of first 2MB and 5% of remaining. */
+ # define BUFCACHE 0
+ #endif
+ 
+ u_int	nbuf = 0;		/* XXX - for softdep_lockedbufs */
+ u_int	bufpages = BUFPAGES;	/* optional hardwired count */
+ u_int	bufcache = BUFCACHE;	/* % of RAM to use for buffer cache */
+ 
+ 
  /* Macros to clear/set/test flags. */
  #define	SET(t, f)	(t) |= (f)
  #define	CLR(t, f)	(t) &= ~(f)
***************
*** 142,147 ****
--- 162,175 ----
   */
  struct pool bufpool;
  
+ /* Small buffer memory pools */
+ static struct pool buf1k, buf2k;
+ 
+ /* Buffer memory management variables */
+ u_long bufmem_hiwater;
+ u_long bufmem_lowater;
+ u_long bufmem;
+ 
  /*
   * bread()/breadn() helper.
   */
***************
*** 187,226 ****
  void
  bufinit()
  {
- 	struct buf *bp;
  	struct bqueues *dp;
- 	u_int i, base, residual;
  
  	/*
! 	 * Initialize the buffer pool.  This pool is used for buffers
! 	 * which are strictly I/O control blocks, not buffer cache
! 	 * buffers.
  	 */
  	pool_init(&bufpool, sizeof(struct buf), 0, 0, 0, "bufpl", NULL);
  
  	for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
  		TAILQ_INIT(dp);
! 	bufhashtbl = hashinit(nbuf, HASH_LIST, M_CACHE, M_WAITOK, &bufhash);
! 	base = bufpages / nbuf;
! 	residual = bufpages % nbuf;
! 	for (i = 0; i < nbuf; i++) {
! 		bp = &buf[i];
! 		memset((char *)bp, 0, sizeof(*bp));
! 		BUF_INIT(bp);
! 		bp->b_dev = NODEV;
! 		bp->b_vnbufs.le_next = NOLIST;
! 		bp->b_data = buffers + i * MAXBSIZE;
! 		if (i < residual)
! 			bp->b_bufsize = (base + 1) * PAGE_SIZE;
! 		else
! 			bp->b_bufsize = base * PAGE_SIZE;
! 		bp->b_flags = B_INVAL;
! 		dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY];
! 		binsheadfree(bp, dp);
! 		binshash(bp, &invalhash);
  	}
  }
  
  static __inline struct buf *
  bio_doread(vp, blkno, size, cred, async)
  	struct vnode *vp;
--- 215,307 ----
  void
  bufinit()
  {
  	struct bqueues *dp;
  
  	/*
! 	 * Initialize the buffer pools.
  	 */
  	pool_init(&bufpool, sizeof(struct buf), 0, 0, 0, "bufpl", NULL);
+ 	pool_init(&buf1k, 1024, 0, 0, 0, "buf1k", NULL);
+ 	pool_init(&buf2k, 2048, 0, 0, 0, "buf2k", NULL);
  
  	for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++)
  		TAILQ_INIT(dp);
! 
! 	/*
! 	 * Determine how many buffers to allocate.
! 	 *
! 	 *	- If bufcache is specified, use that % of memory
! 	 *	  for the buffer cache.
! 	 *
! 	 *	- Otherwise, we default to the traditional BSD
! 	 *	  formula of 10% of the first 2MB and 5% of
! 	 *	  the remaining.
! 	 */
! 	if (bufpages == 0) {
! 		if (bufcache != 0) {
! 			if (bufcache < 5 || bufcache > 95)
! 				panic("bufcache is out of range (%d)",
! 				    bufcache);
! 			bufpages = physmem / 100 * bufcache;
! 		} else {
! 			if (physmem < btoc(2 * 1024 * 1024))
! 				bufpages = physmem / 10;
! 			else
! 				bufpages = (btoc(2 * 1024 * 1024) + physmem) /
! 				    20;
! 		}
  	}
+ 
+ 	nbuf = bufpages; /* XXX - for softdep_lockedbufs */
+ 
+ 	/*
+ 	 * Estimate hash table size based on the amount of memory we
+ 	 * intent to use for the buffer cache and the distribution
+ 	 * of that memory among small and large buffers.
+ 	 * Assume that, on average, there will about the same number
+ 	 * of small and large buffers on the queues. This assumption
+ 	 * seems ok for typical FFS filesystems but not for NFS which
+ 	 * mostly uses large buffers.
+ 	 */
+ 	bufhashtbl = hashinit(2 * bufpages, HASH_LIST, M_CACHE, M_WAITOK, &bufhash);
+ 
+ 	bufmem_hiwater = (bufpages + (bufpages >> 3)) << PAGE_SHIFT;
+ 	bufmem_lowater = (bufpages - (bufpages >> 3)) << PAGE_SHIFT;
+ 	bufmem = 0;
+ }
+ 
+ 
+ /*
+  * Buffer memory allocation helper functions
+  */
+ static __inline__ u_long buf_roundsize(u_long size)
+ {
+ 	return (size + 1023) & -1024;
+ }
+ 
+ static caddr_t buf_malloc(size_t size)
+ {
+ 	if (size <= 1024) {
+ 		return pool_get(&buf1k, PR_WAITOK);
+ 	} else if (size <= 2048) {
+ 		return pool_get(&buf2k, PR_WAITOK);
+ 	} else
+ 		return (caddr_t)uvm_km_alloc(kernel_map, size);
  }
  
+ static void buf_mrelease(caddr_t addr, size_t size)
+ {
+ 	if (size <= 1024) {
+ 		pool_put(&buf1k, addr);
+ 		return;
+ 	} else if (size <= 2048) {
+ 		pool_put(&buf2k, addr);
+ 		return;
+ 	} else
+ 		uvm_km_free(kernel_map, (vaddr_t)addr, size);
+ }
+ 
+ 
  static __inline struct buf *
  bio_doread(vp, blkno, size, cred, async)
  	struct vnode *vp;
***************
*** 596,602 ****
  		}
  		if (bp->b_bufsize <= 0)
  			/* no data */
! 			bufq = &bufqueues[BQ_EMPTY];
  		else
  			/* invalid data */
  			bufq = &bufqueues[BQ_AGE];
--- 677,683 ----
  		}
  		if (bp->b_bufsize <= 0)
  			/* no data */
! 			goto already_queued;
  		else
  			/* invalid data */
  			bufq = &bufqueues[BQ_AGE];
***************
*** 639,644 ****
--- 720,731 ----
  	/* Allow disk interrupts. */
  	simple_unlock(&bp->b_interlock);
  	simple_unlock(&bqueue_slock);
+ 	if (bp->b_bufsize <= 0) {
+ #ifdef DEBUG
+ 		memset((char *)bp, 0, sizeof(*bp));
+ #endif
+ 		pool_put(&bufpool, bp);
+ 	}
  	splx(s);
  }
  
***************
*** 682,687 ****
--- 769,775 ----
  {
  	struct buf *bp;
  	int s, err;
+ 	int preserve;
  
  start:
  	s = splbio();
***************
*** 711,718 ****
  #endif
  		SET(bp->b_flags, B_BUSY);
  		bremfree(bp);
  	} else {
! 		if ((bp = getnewbuf(slpflag, slptimeo)) == NULL) {
  			simple_unlock(&bqueue_slock);
  			splx(s);
  			goto start;
--- 799,807 ----
  #endif
  		SET(bp->b_flags, B_BUSY);
  		bremfree(bp);
+ 		preserve = 1;
  	} else {
! 		if ((bp = getnewbuf(slpflag, slptimeo, 0)) == NULL) {
  			simple_unlock(&bqueue_slock);
  			splx(s);
  			goto start;
***************
*** 721,726 ****
--- 810,816 ----
  		binshash(bp, BUFHASH(vp, blkno));
  		bp->b_blkno = bp->b_lblkno = bp->b_rawblkno = blkno;
  		bgetvp(vp, bp);
+ 		preserve = 0;
  	}
  	simple_unlock(&bp->b_interlock);
  	simple_unlock(&bqueue_slock);
***************
*** 732,738 ****
  	if (ISSET(bp->b_flags, B_LOCKED)) {
  		KASSERT(bp->b_bufsize >= size);
  	} else {
! 		allocbuf(bp, size);
  	}
  	return (bp);
  }
--- 822,828 ----
  	if (ISSET(bp->b_flags, B_LOCKED)) {
  		KASSERT(bp->b_bufsize >= size);
  	} else {
! 		allocbuf(bp, size, preserve);
  	}
  	return (bp);
  }
***************
*** 749,755 ****
  
  	s = splbio();
  	simple_lock(&bqueue_slock);
! 	while ((bp = getnewbuf(0, 0)) == 0)
  		;
  
  	SET(bp->b_flags, B_INVAL);
--- 839,845 ----
  
  	s = splbio();
  	simple_lock(&bqueue_slock);
! 	while ((bp = getnewbuf(0, 0, 0)) == 0)
  		;
  
  	SET(bp->b_flags, B_INVAL);
***************
*** 757,763 ****
  	simple_unlock(&bqueue_slock);
  	simple_unlock(&bp->b_interlock);
  	splx(s);
! 	allocbuf(bp, size);
  	return (bp);
  }
  
--- 847,853 ----
  	simple_unlock(&bqueue_slock);
  	simple_unlock(&bp->b_interlock);
  	splx(s);
! 	allocbuf(bp, size, 0);
  	return (bp);
  }
  
***************
*** 770,863 ****
   * responsibility to fill out the buffer's additional contents.
   */
  void
! allocbuf(bp, size)
  	struct buf *bp;
  	int size;
  {
! 	struct buf *nbp;
! 	vsize_t desired_size;
! 	int s;
  
! 	desired_size = round_page((vsize_t)size);
  	if (desired_size > MAXBSIZE)
! 		panic("allocbuf: buffer larger than MAXBSIZE requested");
  
! 	if (bp->b_bufsize == desired_size)
! 		goto out;
  
  	/*
! 	 * If the buffer is smaller than the desired size, we need to snarf
! 	 * it from other buffers.  Get buffers (via getnewbuf()), and
! 	 * steal their pages.
  	 */
! 	while (bp->b_bufsize < desired_size) {
! 		int amt;
! 
! 		/* find a buffer */
! 		s = splbio();
! 		simple_lock(&bqueue_slock);
! 		while ((nbp = getnewbuf(0, 0)) == NULL)
! 			;
! 
! 		SET(nbp->b_flags, B_INVAL);
! 		binshash(nbp, &invalhash);
  
! 		simple_unlock(&nbp->b_interlock);
! 		simple_unlock(&bqueue_slock);
! 		splx(s);
! 
! 		/* and steal its pages, up to the amount we need */
! 		amt = min(nbp->b_bufsize, (desired_size - bp->b_bufsize));
! 		pagemove((nbp->b_data + nbp->b_bufsize - amt),
! 			 bp->b_data + bp->b_bufsize, amt);
! 		bp->b_bufsize += amt;
! 		nbp->b_bufsize -= amt;
! 
! 		/* reduce transfer count if we stole some data */
! 		if (nbp->b_bcount > nbp->b_bufsize)
! 			nbp->b_bcount = nbp->b_bufsize;
  
! #ifdef DIAGNOSTIC
! 		if (nbp->b_bufsize < 0)
! 			panic("allocbuf: negative bufsize");
! #endif
! 		brelse(nbp);
! 	}
  
  	/*
! 	 * If we want a buffer smaller than the current size,
! 	 * shrink this buffer.  Grab a buf head from the EMPTY queue,
! 	 * move a page onto it, and put it on front of the AGE queue.
! 	 * If there are no free buffer headers, leave the buffer alone.
  	 */
! 	if (bp->b_bufsize > desired_size) {
! 		s = splbio();
! 		simple_lock(&bqueue_slock);
! 		if ((nbp = TAILQ_FIRST(&bufqueues[BQ_EMPTY])) == NULL) {
! 			/* No free buffer head */
  			simple_unlock(&bqueue_slock);
! 			splx(s);
! 			goto out;
  		}
! 		/* No need to lock nbp since it came from the empty queue */
! 		bremfree(nbp);
! 		SET(nbp->b_flags, B_BUSY | B_INVAL);
  		simple_unlock(&bqueue_slock);
! 		splx(s);
! 
! 		/* move the page to it and note this change */
! 		pagemove(bp->b_data + desired_size,
! 		    nbp->b_data, bp->b_bufsize - desired_size);
! 		nbp->b_bufsize = bp->b_bufsize - desired_size;
! 		bp->b_bufsize = desired_size;
! 		nbp->b_bcount = 0;
! 
! 		/* release the newly-filled buffer and leave */
! 		brelse(nbp);
  	}
  
  out:
! 	bp->b_bcount = size;
  }
  
  /*
--- 860,942 ----
   * responsibility to fill out the buffer's additional contents.
   */
  void
! allocbuf(bp, size, preserve)
  	struct buf *bp;
  	int size;
+ 	int preserve;
  {
! 	vsize_t oldsize, desired_size;
! 	caddr_t addr;
! 	int s, delta;
  
! 	desired_size = buf_roundsize(size);
  	if (desired_size > MAXBSIZE)
! 		printf("allocbuf: buffer larger than MAXBSIZE requested");
  
! 	bp->b_bcount = size;
! 
! 	oldsize = bp->b_bufsize;
! 	if (oldsize == desired_size)
! 		return;
  
  	/*
! 	 * If we want a buffer of a different size, re-allocate the
! 	 * buffer's memory; copy old content only if needed.
  	 */
! 	addr = buf_malloc(desired_size);
! 	if (preserve)
! 		memcpy(addr, bp->b_data, MIN(oldsize,desired_size));
! 	if (bp->b_data != NULL)
! 		buf_mrelease(bp->b_data, oldsize);
! 	bp->b_data = addr;
! 	bp->b_bufsize = desired_size;
  
! 	/*
! 	 * Update overall buffer memory counter (protected by bqueue_slock)
! 	 */
! 	delta = (long)desired_size - (long)oldsize;
  
! 	s = splbio();
! 	simple_lock(&bqueue_slock);
! 	if ((bufmem += delta) < bufmem_hiwater)
! 		goto out;
  
  	/*
! 	 * Need to trim overall memory usage.
  	 */
! 	while (bufmem > bufmem_lowater) {
! 		long size;
! 		int wanted;
! 
! 		/* Instruct getnewbuf() to get buffers off the queues */
! 		if ((bp = getnewbuf(PCATCH,1,1)) == NULL)
! 			break;
! 		wanted = ISSET(bp->b_flags, B_WANTED);
! 		simple_unlock(&bp->b_interlock);
! 		if (wanted) {
! 			printf("buftrim: got WANTED buffer\n");
! 			SET(bp->b_flags, B_INVAL);
! 			binshash(bp, &invalhash);
  			simple_unlock(&bqueue_slock);
! 			brelse(bp);
! 			simple_lock(&bqueue_slock);
! 			break;
  		}
! 		size = bp->b_bufsize;
! 		bufmem -= size;
  		simple_unlock(&bqueue_slock);
! 		if (size > 0) {
! 			buf_mrelease(bp->b_data, size);
! 			bp->b_bcount = bp->b_bufsize = 0;
! 		}
! 		/* brelse() will return the buffer to the global buffer pool */
! 		brelse(bp);
! 		simple_lock(&bqueue_slock);
  	}
  
  out:
! 	simple_unlock(&bqueue_slock);
! 	splx(s);
  }
  
  /*
***************
*** 869,882 ****
   * Return buffer locked.
   */
  struct buf *
! getnewbuf(slpflag, slptimeo)
! 	int slpflag, slptimeo;
  {
  	struct buf *bp;
  
  start:
  	LOCK_ASSERT(simple_lock_held(&bqueue_slock));
  
  	if ((bp = TAILQ_FIRST(&bufqueues[BQ_AGE])) != NULL ||
  	    (bp = TAILQ_FIRST(&bufqueues[BQ_LRU])) != NULL) {
  		simple_lock(&bp->b_interlock);
--- 948,975 ----
   * Return buffer locked.
   */
  struct buf *
! getnewbuf(slpflag, slptimeo, from_bufq)
! 	int slpflag, slptimeo, from_bufq;
  {
  	struct buf *bp;
  
  start:
  	LOCK_ASSERT(simple_lock_held(&bqueue_slock));
  
+ 	/*
+ 	 * Get a new buffer from the pool; but use NOWAIT because
+ 	 * we have buffer queues locked.
+ 	 */
+ 	if (bufmem < bufmem_hiwater && !from_bufq &&
+ 	    (bp = pool_get(&bufpool, PR_NOWAIT)) != NULL) {
+ 		memset((char *)bp, 0, sizeof(*bp));
+ 		BUF_INIT(bp);
+ 		bp->b_dev = NODEV;
+ 		bp->b_vnbufs.le_next = NOLIST;
+ 		bp->b_flags = B_BUSY;
+ 		return (bp);
+ 	}
+ 
  	if ((bp = TAILQ_FIRST(&bufqueues[BQ_AGE])) != NULL ||
  	    (bp = TAILQ_FIRST(&bufqueues[BQ_LRU])) != NULL) {
  		simple_lock(&bp->b_interlock);
***************
*** 889,894 ****
--- 982,990 ----
  		return (NULL);
  	}
  
+ 	if (bp->b_bufsize <= 0)
+ 		printf("buffer %p: on queue but no mem\n", bp);
+ 
  	if (ISSET(bp->b_flags, B_VFLUSH)) {
  		/*
  		 * This is a delayed write buffer being flushed to disk.  Make
***************
*** 1040,1045 ****
--- 1136,1277 ----
  	simple_unlock(&bqueue_slock);
  	return (n);
  }
+ 
+ /*
+  * Wait for all buffers to complete I/O
+  * Return the number of "stuck" buffers.
+  */
+ int
+ buf_syncwait(void)
+ {
+ 	struct buf *bp;
+ 	int iter, nbusy, nbusy_prev = 0, dcount, s, ihash;
+ 
+ 	dcount = 10000;
+ 	for (iter = 0; iter < 20;) {
+ 		s = splbio();
+ 		simple_lock(&bqueue_slock);
+ 		nbusy = 0;
+ 		for (ihash = 0; ihash < bufhash+1; ihash++) {
+ 		    LIST_FOREACH(bp, &bufhashtbl[ihash], b_hash) {
+ 			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
+ 				nbusy++;
+ 			/*
+ 			 * With soft updates, some buffers that are
+ 			 * written will be remarked as dirty until other
+ 			 * buffers are written.
+ 			 */
+ 			if (bp->b_vp && bp->b_vp->v_mount
+ 			    && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP)
+ 			    && (bp->b_flags & B_DELWRI)) {
+ 				simple_lock(&bp->b_interlock);
+ 				bremfree(bp);
+ 				bp->b_flags |= B_BUSY;
+ 				nbusy++;
+ 				simple_unlock(&bp->b_interlock);
+ 				simple_unlock(&bqueue_slock);
+ 				bawrite(bp);
+ 				if (dcount-- <= 0) {
+ 					printf("softdep ");
+ 					goto fail;
+ 				}
+ 				simple_lock(&bqueue_slock);
+ 			}
+ 		    }
+ 		}
+ 
+ 		simple_unlock(&bqueue_slock);
+ 		splx(s);
+ 
+ 		if (nbusy == 0)
+ 			break;
+ 		if (nbusy_prev == 0)
+ 			nbusy_prev = nbusy;
+ 		printf("%d ", nbusy);
+ 		tsleep(&nbusy, PRIBIO, "bflush",
+ 		    (iter == 0) ? 1 : hz / 25 * iter);
+ 		if (nbusy >= nbusy_prev) /* we didn't flush anything */
+ 			iter++;
+ 		else
+ 			nbusy_prev = nbusy;
+ 	}
+ 
+ 	if (nbusy) {
+ fail:;
+ #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
+ 		printf("giving up\nPrinting vnodes for busy buffers\n");
+ 		for (ihash = 0; ihash < bufhash+1; ihash++) {
+ 		    LIST_FOREACH(bp, &bufhashtbl[ihash], b_hash) {
+ 			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
+ 				vprint(NULL, bp->b_vp);
+ 		    }
+ 		}
+ #endif
+ 	}
+ 
+ 	return nbusy;
+ }
+ 
+ #if notyet
+ int sysctl_dobuf(int *, u_int, void *, size_t *);
+ #define KERN_BUFSLOP 10
+ int
+ sysctl_dobuf(int *name, u_int namelen, void *where, size_t *sizep)
+ {
+ 	struct buf *bp;
+ 	char *dp;
+ 	u_int i, elem_size;
+ 	size_t len, buflen, needed;
+ 	int error, s;
+ 
+ 	dp = where;
+ 	len = buflen = where != NULL ? *sizep : 0;
+ 	error = 0;
+ 	needed = 0;
+ 	elem_size = sizeof(struct buf);
+ 
+ 	PHOLD(curlwp);
+ 	if (buflen != 0) {
+ 		/* Lock user buffer */
+ 		error = uvm_vslock(curproc, where, buflen, VM_PROT_WRITE);
+ 		if (error)
+ 			goto out;
+ 	}
+ 
+ 	s = splbio();
+ 	simple_lock(&bqueue_slock);
+ 	for (i = 0; i < BQUEUES; i++) {
+ 		TAILQ_FOREACH(bp, &bufqueues[i], b_freelist) {
+ 			if (len >= sizeof(elem_size)) {
+ 				error = copyout(bp, dp, elem_size);
+ 				if (error)
+ 					goto cleanup;
+ 				dp += elem_size;
+ 				len -= elem_size;
+ 			}
+ 			needed += elem_size;
+ 		}
+ 	}
+ cleanup:
+ 	simple_unlock(&bqueue_slock);
+ 	splx(s);
+ 	if (buflen)
+ 		uvm_vsunlock(curproc, where, buflen);
+ 
+ 	if (where != NULL) {
+ 		*sizep = (char *)dp - (char *)where;
+ 		if (needed > *sizep)
+ 			error = ENOMEM;
+ 	} else {
+ 		needed += KERN_BUFSLOP;
+ 		*sizep = needed;
+ 	}
+ 
+ out:
+ 	PRELE(curlwp);
+ 	return (error);
+ }
+ #endif
  
  #ifdef DEBUG
  /*
Index: kern/vfs_subr.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_subr.c,v
retrieving revision 1.210
diff -c -r1.210 vfs_subr.c
*** kern/vfs_subr.c	18 Nov 2003 18:26:18 -0000	1.210
--- kern/vfs_subr.c	29 Nov 2003 11:59:06 -0000
***************
*** 2600,2607 ****
  void
  vfs_shutdown()
  {
- 	struct buf *bp;
- 	int iter, nbusy, nbusy_prev = 0, dcount, s;
  	struct lwp *l = curlwp;
  	struct proc *p;
  
--- 2600,2605 ----
***************
*** 2621,2681 ****
  	sys_sync(l, NULL, NULL);
  
  	/* Wait for sync to finish. */
! 	dcount = 10000;
! 	for (iter = 0; iter < 20;) {
! 		nbusy = 0;
! 		for (bp = &buf[nbuf]; --bp >= buf; ) {
! 			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
! 				nbusy++;
! 			/*
! 			 * With soft updates, some buffers that are
! 			 * written will be remarked as dirty until other
! 			 * buffers are written.
! 			 */
! 			if (bp->b_vp && bp->b_vp->v_mount
! 			    && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP)
! 			    && (bp->b_flags & B_DELWRI)) {
! 				s = splbio();
! 				simple_lock(&bqueue_slock);
! 				bremfree(bp);
! 				simple_unlock(&bqueue_slock);
! 				bp->b_flags |= B_BUSY;
! 				splx(s);
! 				nbusy++;
! 				bawrite(bp);
! 				if (dcount-- <= 0) {
! 					printf("softdep ");
! 					goto fail;
! 				}
! 			}
! 		}
! 		if (nbusy == 0)
! 			break;
! 		if (nbusy_prev == 0)
! 			nbusy_prev = nbusy;
! 		printf("%d ", nbusy);
! 		tsleep(&nbusy, PRIBIO, "bflush",
! 		    (iter == 0) ? 1 : hz / 25 * iter);
! 		if (nbusy >= nbusy_prev) /* we didn't flush anything */
! 			iter++;
! 		else
! 			nbusy_prev = nbusy;
! 	}
! 	if (nbusy) {
! fail:
! #if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
! 		printf("giving up\nPrinting vnodes for busy buffers\n");
! 		for (bp = &buf[nbuf]; --bp >= buf; )
! 			if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
! 				vprint(NULL, bp->b_vp);
! 
  #if defined(DDB) && defined(DEBUG_HALT_BUSY)
  		Debugger();
  #endif
- 
- #else  /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
  		printf("giving up\n");
- #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */
  		return;
  	} else
  		printf("done\n");
--- 2619,2629 ----
  	sys_sync(l, NULL, NULL);
  
  	/* Wait for sync to finish. */
! 	if (buf_syncwait() != 0) {
  #if defined(DDB) && defined(DEBUG_HALT_BUSY)
  		Debugger();
  #endif
  		printf("giving up\n");
  		return;
  	} else
  		printf("done\n");
Index: sys/buf.h
===================================================================
RCS file: /cvsroot/src/sys/sys/buf.h,v
retrieving revision 1.64
diff -c -r1.64 buf.h
*** sys/buf.h	7 Aug 2003 16:33:59 -0000	1.64
--- sys/buf.h	29 Nov 2003 11:59:07 -0000
***************
*** 263,269 ****
  extern	struct pool bufpool;
  
  __BEGIN_DECLS
! void	allocbuf __P((struct buf *, int));
  void	bawrite __P((struct buf *));
  void	bdirty __P((struct buf *));
  void	bdwrite __P((struct buf *));
--- 263,269 ----
  extern	struct pool bufpool;
  
  __BEGIN_DECLS
! void	allocbuf __P((struct buf *, int, int));
  void	bawrite __P((struct buf *));
  void	bdirty __P((struct buf *));
  void	bdwrite __P((struct buf *));
***************
*** 285,291 ****
  void	cluster_write __P((struct buf *, u_quad_t));
  struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
  struct buf *geteblk __P((int));
! struct buf *getnewbuf __P((int, int));
  struct buf *incore __P((struct vnode *, daddr_t));
  
  void	minphys __P((struct buf *));
--- 285,291 ----
  void	cluster_write __P((struct buf *, u_quad_t));
  struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
  struct buf *geteblk __P((int));
! struct buf *getnewbuf __P((int, int, int));
  struct buf *incore __P((struct vnode *, daddr_t));
  
  void	minphys __P((struct buf *));
***************
*** 295,300 ****
--- 295,301 ----
  void  brelvp __P((struct buf *));
  void  reassignbuf __P((struct buf *, struct vnode *));
  void  bgetvp __P((struct vnode *, struct buf *));
+ int   buf_syncwait __P((void));
  #ifdef DDB
  void	vfs_buf_print __P((struct buf *, int, void (*)(const char *, ...)));
  #endif
Index: ufs/ffs/ffs_alloc.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ffs/ffs_alloc.c,v
retrieving revision 1.71
diff -c -r1.71 ffs_alloc.c
*** ufs/ffs/ffs_alloc.c	27 Nov 2003 04:52:55 -0000	1.71
--- ufs/ffs/ffs_alloc.c	29 Nov 2003 11:59:07 -0000
***************
*** 285,291 ****
  		if (bpp != NULL) {
  			if (bp->b_blkno != fsbtodb(fs, bno))
  				panic("bad blockno");
! 			allocbuf(bp, nsize);
  			bp->b_flags |= B_DONE;
  			memset(bp->b_data + osize, 0, nsize - osize);
  			*bpp = bp;
--- 285,291 ----
  		if (bpp != NULL) {
  			if (bp->b_blkno != fsbtodb(fs, bno))
  				panic("bad blockno");
! 			allocbuf(bp, nsize, 1);
  			bp->b_flags |= B_DONE;
  			memset(bp->b_data + osize, 0, nsize - osize);
  			*bpp = bp;
***************
*** 364,370 ****
  		ip->i_flag |= IN_CHANGE | IN_UPDATE;
  		if (bpp != NULL) {
  			bp->b_blkno = fsbtodb(fs, bno);
! 			allocbuf(bp, nsize);
  			bp->b_flags |= B_DONE;
  			memset(bp->b_data + osize, 0, (u_int)nsize - osize);
  			*bpp = bp;
--- 364,370 ----
  		ip->i_flag |= IN_CHANGE | IN_UPDATE;
  		if (bpp != NULL) {
  			bp->b_blkno = fsbtodb(fs, bno);
! 			allocbuf(bp, nsize, 1);
  			bp->b_flags |= B_DONE;
  			memset(bp->b_data + osize, 0, (u_int)nsize - osize);
  			*bpp = bp;
Index: ufs/ffs/ffs_subr.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/ffs/ffs_subr.c,v
retrieving revision 1.30
diff -c -r1.30 ffs_subr.c
*** ufs/ffs/ffs_subr.c	27 Oct 2003 00:12:42 -0000	1.30
--- ufs/ffs/ffs_subr.c	29 Nov 2003 11:59:07 -0000
***************
*** 199,204 ****
--- 199,205 ----
  	struct buf *bp;
  	struct inode *ip;
  {
+ #if 0
  	struct buf *ebp, *ep;
  	daddr_t start, last;
  	struct vnode *vp;
***************
*** 225,230 ****
--- 226,234 ----
  		    ep->b_blkno + btodb(ep->b_bcount) - 1);
  		panic("Disk buffer overlap");
  	}
+ #else
+ 	printf("ffs_checkoverlap disabled due to buffer cache implementation changes\n");
+ #endif
  }
  #endif /* _KERNEL && DIAGNOSTIC */
  
Index: ufs/lfs/lfs_balloc.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/lfs/lfs_balloc.c,v
retrieving revision 1.46
diff -c -r1.46 lfs_balloc.c
*** ufs/lfs/lfs_balloc.c	29 Oct 2003 01:25:04 -0000	1.46
--- ufs/lfs/lfs_balloc.c	29 Nov 2003 11:59:07 -0000
***************
*** 429,435 ****
  
  	if (bpp) {
  		obufsize = (*bpp)->b_bufsize;
! 		allocbuf(*bpp, nsize);
  
  		/* Adjust locked-list accounting */
  		if (((*bpp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED)
--- 429,435 ----
  
  	if (bpp) {
  		obufsize = (*bpp)->b_bufsize;
! 		allocbuf(*bpp, nsize, 1);
  
  		/* Adjust locked-list accounting */
  		if (((*bpp)->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED)
Index: ufs/lfs/lfs_inode.c
===================================================================
RCS file: /cvsroot/src/sys/ufs/lfs/lfs_inode.c,v
retrieving revision 1.80
diff -c -r1.80 lfs_inode.c
*** ufs/lfs/lfs_inode.c	7 Nov 2003 14:48:28 -0000	1.80
--- ufs/lfs/lfs_inode.c	29 Nov 2003 11:59:08 -0000
***************
*** 379,385 ****
  		if (ovp->v_type != VDIR)
  			memset((char *)bp->b_data + offset, 0,
  			       (u_int)(size - offset));
! 		allocbuf(bp, size);
  		if ((bp->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED)
  			locked_queue_bytes -= obufsize - bp->b_bufsize;
  		if (bp->b_flags & B_DELWRI)
--- 379,385 ----
  		if (ovp->v_type != VDIR)
  			memset((char *)bp->b_data + offset, 0,
  			       (u_int)(size - offset));
! 		allocbuf(bp, size, 1);
  		if ((bp->b_flags & (B_LOCKED | B_CALL)) == B_LOCKED)
  			locked_queue_bytes -= obufsize - bp->b_bufsize;
  		if (bp->b_flags & B_DELWRI)
Index: uvm/uvm_glue.c
===================================================================
RCS file: /cvsroot/src/sys/uvm/uvm_glue.c,v
retrieving revision 1.73
diff -c -r1.73 uvm_glue.c
*** uvm/uvm_glue.c	13 Nov 2003 03:09:30 -0000	1.73
--- uvm/uvm_glue.c	29 Nov 2003 11:59:08 -0000
***************
*** 103,112 ****
   * XXXCDC: do these really belong here?
   */
  
- int readbuffers = 0;		/* allow KGDB to read kern buffer pool */
- 				/* XXX: see uvm_kernacc */
- 
- 
  /*
   * uvm_kernacc: can the kernel access a region of memory
   *
--- 103,108 ----
***************
*** 129,146 ****
  	rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
  	vm_map_unlock_read(kernel_map);
  
- 	/*
- 	 * XXX there are still some things (e.g. the buffer cache) that
- 	 * are managed behind the VM system's back so even though an
- 	 * address is accessible in the mind of the VM system, there may
- 	 * not be physical pages where the VM thinks there is.  This can
- 	 * lead to bogus allocation of pages in the kernel address space
- 	 * or worse, inconsistencies at the pmap level.  We only worry
- 	 * about the buffer cache for now.
- 	 */
- 	if (!readbuffers && rv && (eaddr > (vaddr_t)buffers &&
- 			     saddr < (vaddr_t)buffers + MAXBSIZE * nbuf))
- 		rv = FALSE;
  	return(rv);
  }
  
--- 125,130 ----