Subject: Re: Using different cache modes for r/o vs r/w pages
To: None <port-arm@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: port-arm
Date: 01/31/2002 11:40:49
--WYTEVAkct0FjGQmd
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Wed, Jan 30, 2002 at 06:27:29PM -0800, Jason R Thorpe wrote:

 > It's currently not as efficient as it could be.  I'm going to clean
 > up the mod/ref stuff in pmap_enter() to make it a bit faster, and
 > use __HAVE_VM_PAGE_MD to speed up mod/ref emulation a bit.  In
 > particular, things like the page's cacheable attribute belong in
 > flags in the pv_head (or, vm_page_md), not copied into every individual
 > pv_entry.

Ok, I haven't addressed these issues yet, but I have made all PTE
construction use boot-time-initialized prototypes.  I think the code
is a lot cleaner now.

Here is my current diff (just arch/arm and the iq80310 stuff for
now -- I'll make sure the other ports compile today while I sit on
a plane headed for Phoenix, AZ).

I still lose when going from WB->WT, but I think that might be lossage
in the mod/ref emulation stuff.  I'm going to take my IQ80310 board
with me while I'm gone this weekend, so hopefully I'll be able to track
it down.

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--WYTEVAkct0FjGQmd
Content-Type: text/plain; charset=us-ascii
Content-Description: pmap-cache-take4
Content-Disposition: attachment; filename=foo

Index: arm/arm/cpufunc.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/arm/arm/cpufunc.c,v
retrieving revision 1.29
diff -u -p -r1.29 cpufunc.c
--- arm/arm/cpufunc.c	2002/01/30 00:37:18	1.29
+++ arm/arm/cpufunc.c	2002/01/31 19:25:27
@@ -53,6 +53,9 @@
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
+
+#include <uvm/uvm_extern.h>
+
 #include <machine/cpu.h>
 #include <machine/bootconfig.h>
 #include <arch/arm/arm/disassem.h>
@@ -650,6 +653,7 @@ set_cpufuncs()
 		cpu_reset_needs_v4_MMU_disable = 0;
 		/* XXX Cache info? */
 		arm_dcache_align_mask = -1;
+		pmap_pte_protos_init_arm678();
 		return 0;
 	}
 #endif	/* CPU_ARM6 */
@@ -661,6 +665,7 @@ set_cpufuncs()
 		cpu_reset_needs_v4_MMU_disable = 0;
 		/* XXX Cache info? */
 		arm_dcache_align_mask = -1;
+		pmap_pte_protos_init_arm678();
 		return 0;
 	}
 #endif	/* CPU_ARM7 */
@@ -671,6 +676,7 @@ set_cpufuncs()
 		cpufuncs = arm7tdmi_cpufuncs;
 		cpu_reset_needs_v4_MMU_disable = 0;
 		get_cachetype();
+		pmap_pte_protos_init_arm678();
 		return 0;
 	}
 #endif	
@@ -680,15 +686,16 @@ set_cpufuncs()
 		cpufuncs = arm8_cpufuncs;
 		cpu_reset_needs_v4_MMU_disable = 0;	/* XXX correct? */
 		get_cachetype();
+		pmap_pte_protos_init_arm678();
 		return 0;
 	}
 #endif	/* CPU_ARM8 */
 #ifdef CPU_ARM9
 	if (cputype == CPU_ID_ARM920T) {
-		pte_cache_mode = PT_C;	/* Select write-through cacheing. */
 		cpufuncs = arm9_cpufuncs;
 		cpu_reset_needs_v4_MMU_disable = 1;	/* V4 or higher */
 		get_cachetype();
+		pmap_pte_protos_init_arm9();
 		return 0;
 	}
 #endif /* CPU_ARM9 */
@@ -698,6 +705,7 @@ set_cpufuncs()
 		cpufuncs = sa110_cpufuncs;
 		cpu_reset_needs_v4_MMU_disable = 1;	/* SA needs it */
 		get_cachetype();
+		pmap_pte_protos_init_arm678();		/* XXX */
 		/*
 		 * Enable the right variant of sleeping.
 		 */
@@ -743,7 +751,6 @@ set_cpufuncs()
 			:
 			: "r" (BCUCTL_E0|BCUCTL_E1|BCUCTL_EV));
 
-		pte_cache_mode = PT_C;	/* Select write-through cacheing. */
 		cpufuncs = xscale_cpufuncs;
 
 		/*
@@ -758,6 +765,7 @@ set_cpufuncs()
 
 		cpu_reset_needs_v4_MMU_disable = 1;	/* XScale needs it */
 		get_cachetype();
+		pmap_pte_protos_init_xscale();
 		return 0;
 	}
 #endif /* CPU_XSCALE */
Index: arm/arm32/arm32_machdep.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/arm/arm32/arm32_machdep.c,v
retrieving revision 1.11
diff -u -p -r1.11 arm32_machdep.c
--- arm/arm32/arm32_machdep.c	2002/01/20 03:41:47	1.11
+++ arm/arm32/arm32_machdep.c	2002/01/31 19:25:27
@@ -150,177 +150,6 @@ bootsync(void)
 }
 
 /*
- * A few functions that are used to help construct the page tables
- * during the bootstrap process.
- */
-
-void
-map_section(pagetable, va, pa, cacheable)
-	vaddr_t pagetable;
-	vaddr_t va;
-	paddr_t pa;
-	int cacheable;
-{
-#ifdef	DIAGNOSTIC
-	if (((va | pa) & (L1_SEC_SIZE - 1)) != 0)
-		panic("initarm: Cannot allocate 1MB section on non 1MB boundry\n");
-#endif	/* DIAGNOSTIC */
-
-	if (cacheable)
-		((u_int *)pagetable)[(va >> PDSHIFT)] =
-		    L1_SEC((pa & PD_MASK), pte_cache_mode);
-	else
-		((u_int *)pagetable)[(va >> PDSHIFT)] =
-		    L1_SEC((pa & PD_MASK), 0);
-}
-
-
-void
-map_pagetable(pagetable, va, pa)
-	vaddr_t pagetable;
-	vaddr_t va;
-	paddr_t pa;
-{
-#ifdef	DIAGNOSTIC
-	if ((pa & 0xc00) != 0)
-		panic("pagetables should be group allocated on pageboundry");
-#endif	/* DIAGNOSTIC */
-
-	((u_int *)pagetable)[(va >> PDSHIFT) + 0] =
-	     L1_PTE((pa & PG_FRAME) + 0x000);
-	((u_int *)pagetable)[(va >> PDSHIFT) + 1] =
-	     L1_PTE((pa & PG_FRAME) + 0x400);
-	((u_int *)pagetable)[(va >> PDSHIFT) + 2] =
-	     L1_PTE((pa & PG_FRAME) + 0x800);
-	((u_int *)pagetable)[(va >> PDSHIFT) + 3] =
-	     L1_PTE((pa & PG_FRAME) + 0xc00);
-}
-
-/* cats kernels have a 2nd l2 pt, so the range is bigger hence the 0x7ff etc */
-vsize_t
-map_chunk(pd, pt, va, pa, size, acc, flg)
-	vaddr_t pd;
-	vaddr_t pt;
-	vaddr_t va;
-	paddr_t pa;
-	vsize_t size;
-	u_int acc;
-	u_int flg;
-{
-	pd_entry_t *l1pt = (pd_entry_t *)pd;
-	pt_entry_t *l2pt = (pt_entry_t *)pt;
-	vsize_t remain;
-	u_int loop;
-
-	remain = (size + (NBPG - 1)) & ~(NBPG - 1);
-#ifdef VERBOSE_INIT_ARM
-	printf("map_chunk: pa=%lx va=%lx sz=%lx rem=%lx acc=%x flg=%x\n",
-	    pa, va, size, remain, acc, flg);
-	printf("map_chunk: ");
-#endif
-	size = remain;
-
-	while (remain > 0) {
-		/* Can we do a section mapping ? */
-		if (l1pt && !((pa | va) & (L1_SEC_SIZE - 1))
-		    && remain >= L1_SEC_SIZE) {
-#ifdef VERBOSE_INIT_ARM
-			printf("S");
-#endif
-			l1pt[(va >> PDSHIFT)] = L1_SECPTE(pa, acc, flg);
-			va += L1_SEC_SIZE;
-			pa += L1_SEC_SIZE;
-			remain -= L1_SEC_SIZE;
-		} else
-		/* Can we do a large page mapping ? */
-		if (!((pa | va) & (L2_LPAGE_SIZE - 1))
-		    && (remain >= L2_LPAGE_SIZE)) {
-#ifdef VERBOSE_INIT_ARM
-			printf("L");
-#endif
-			for (loop = 0; loop < 16; ++loop)
-#ifndef cats
-				l2pt[((va >> PGSHIFT) & 0x3f0) + loop] =
-				    L2_LPTE(pa, acc, flg);
-#else
-				l2pt[((va >> PGSHIFT) & 0x7f0) + loop] =
-				    L2_LPTE(pa, acc, flg);
-#endif	
-			va += L2_LPAGE_SIZE;
-			pa += L2_LPAGE_SIZE;
-			remain -= L2_LPAGE_SIZE;
-		} else
-		/* All we can do is a small page mapping */
-		{
-#ifdef VERBOSE_INIT_ARM
-			printf("P");
-#endif
-#ifndef cats			
-			l2pt[((va >> PGSHIFT) & 0x3ff)] = L2_SPTE(pa, acc, flg);
-#else
-			l2pt[((va >> PGSHIFT) & 0x7ff)] = L2_SPTE(pa, acc, flg);
-#endif
-			va += NBPG;
-			pa += NBPG;
-			remain -= NBPG;
-		}
-	}
-#ifdef VERBOSE_INIT_ARM
-	printf("\n");
-#endif
-	return(size);
-}
-
-/* cats versions have larger 2 l2pt's next to each other */
-void
-map_entry(pagetable, va, pa)
-	vaddr_t pagetable;
-	vaddr_t va;
-	paddr_t pa;
-{
-#ifndef cats
-	((pt_entry_t *)pagetable)[((va >> PGSHIFT) & 0x000003ff)] =
-	    L2_PTE((pa & PG_FRAME), AP_KRW);
-#else
-	((pt_entry_t *)pagetable)[((va >> PGSHIFT) & 0x000007ff)] =
-	    L2_PTE((pa & PG_FRAME), AP_KRW);
-#endif	
-}
-
-
-void
-map_entry_nc(pagetable, va, pa)
-	vaddr_t pagetable;
-	vaddr_t va;
-	paddr_t pa;
-{
-#ifndef cats
-	((pt_entry_t *)pagetable)[((va >> PGSHIFT) & 0x000003ff)] =
-	    L2_PTE_NC_NB((pa & PG_FRAME), AP_KRW);
-#else
-	((pt_entry_t *)pagetable)[((va >> PGSHIFT) & 0x000007ff)] =
-	    L2_PTE_NC_NB((pa & PG_FRAME), AP_KRW);
-#endif
-}
-
-
-void
-map_entry_ro(pagetable, va, pa)
-	vaddr_t pagetable;
-	vaddr_t va;
-	paddr_t pa;
-{
-#ifndef cats
-	((pt_entry_t *)pagetable)[((va >> PGSHIFT) & 0x000003ff)] =
-	    L2_PTE((pa & PG_FRAME), AP_KR);
-#else
-	((pt_entry_t *)pagetable)[((va >> PGSHIFT) & 0x000007ff)] =
-	    L2_PTE((pa & PG_FRAME), AP_KR);
-#endif
-}
-
-
-/*
  * void cpu_startup(void)
  *
  * Machine dependant startup code. 
@@ -486,8 +315,12 @@ cpu_startup()
 void
 zero_page_readonly()
 {
+
+	/* XXXJRT Do we really care about caching page0?! */
 	WriteWord(PROCESS_PAGE_TBLS_BASE + 0,
-	    L2_PTE((systempage.pv_pa & PG_FRAME), AP_KR));
+	    systempage.pv_pa | pte_proto(PTE_PROTO_KERNEL,
+					 VM_PROT_READ,
+					 PTE_PROTO_CACHE));
 	cpu_tlb_flushID_SE(0x00000000);
 }
 
@@ -502,8 +335,12 @@ zero_page_readonly()
 void
 zero_page_readwrite()
 {
+
+	/* XXXJRT See above. */
 	WriteWord(PROCESS_PAGE_TBLS_BASE + 0,
-	    L2_PTE((systempage.pv_pa & PG_FRAME), AP_KRW));
+	    systempage.pv_pa | pte_proto(PTE_PROTO_KERNEL,
+					 VM_PROT_READ|VM_PROT_WRITE,
+					 PTE_PROTO_CACHE));
 	cpu_tlb_flushID_SE(0x00000000);
 }
 
@@ -618,5 +455,3 @@ parse_mi_bootargs(args)
 		if (integer)
 			boothowto |= AB_VERBOSE;
 }
-
-/* End of machdep.c */
Index: arm/arm32/bus_dma.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/arm/arm32/bus_dma.c,v
retrieving revision 1.8
diff -u -p -r1.8 bus_dma.c
--- arm/arm32/bus_dma.c	2002/01/25 20:57:41	1.8
+++ arm/arm32/bus_dma.c	2002/01/31 19:25:28
@@ -547,7 +547,10 @@ _bus_dmamem_map(bus_dma_tag_t t, bus_dma
 				cpu_dcache_wbinv_range(va, NBPG);
 				cpu_drain_writebuf();
 				ptep = vtopte(va);
-				*ptep = ((*ptep) & (~PT_C | PT_B));
+				*ptep = (*ptep & PG_FRAME) |
+				    pmap_pte_proto(pmap_kernel(),
+						   VM_PROT_READ|VM_PROT_WRITE,
+						   PTE_PROTO_NOCACHE);
 				tlb_flush();
 			}
 #ifdef DEBUG_DMA
Index: arm/arm32/pmap.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/arm/arm32/pmap.c,v
retrieving revision 1.36
diff -u -p -r1.36 pmap.c
--- arm/arm32/pmap.c	2002/01/25 19:19:25	1.36
+++ arm/arm32/pmap.c	2002/01/31 19:25:31
@@ -193,6 +193,7 @@ pt_entry_t msgbufpte;
 extern caddr_t msgbufaddr;
 
 boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
+
 /*
  * locking data structures
  */
@@ -326,11 +327,13 @@ static void pmap_vac_me_user __P((struct
     pt_entry_t *, boolean_t));
 
 /*
- * Cache enable bits in PTE to use on pages that are cacheable.
- * On most machines this is cacheable/bufferable, but on some, eg arm10, we
- * can chose between write-through and write-back cacheing.
+ * Prototype PTE and L1 section descriptor arrays.  These are initialized
+ * in pmap_pte_protos_init_*().
  */
-pt_entry_t pte_cache_mode = (PT_C | PT_B);
+pt_entry_t pte_protos[4][8];
+pd_entry_t l1sec_protos[2][8];
+pt_entry_t lpte_protos[2][8];
+pd_entry_t pde_proto;
 
 /*
  * real definition of pv_entry.
@@ -933,22 +936,27 @@ pmap_map_in_l1(pmap, va, l2pa, selfref)
 	/* Calculate the index into the L1 page table. */
 	ptva = (va >> PDSHIFT) & ~3;
 
-	PDEBUG(0, printf("wiring %08lx in to pd%p pte0x%lx va0x%lx\n", l2pa,
-	    pmap->pm_pdir, L1_PTE(l2pa), ptva));
+	PDEBUG(0, printf("wiring %08lx in to pd=%p pte=0x%lx va=0x%lx\n",
+	    l2pa, pmap->pm_pdir, l2pt | pde_proto, ptva));
 
 	/* Map page table into the L1. */
-	pmap->pm_pdir[ptva + 0] = L1_PTE(l2pa + 0x000);
-	pmap->pm_pdir[ptva + 1] = L1_PTE(l2pa + 0x400);
-	pmap->pm_pdir[ptva + 2] = L1_PTE(l2pa + 0x800);
-	pmap->pm_pdir[ptva + 3] = L1_PTE(l2pa + 0xc00);
+	pmap->pm_pdir[ptva + 0] = (l2pa + 0x000) | pde_proto;
+	pmap->pm_pdir[ptva + 1] = (l2pa + 0x400) | pde_proto;
+	pmap->pm_pdir[ptva + 2] = (l2pa + 0x800) | pde_proto;
+	pmap->pm_pdir[ptva + 3] = (l2pa + 0xc00) | pde_proto;
 
 	PDEBUG(0, printf("pt self reference %lx in %lx\n",
-	    L2_PTE_NC_NB(l2pa, AP_KRW), pmap->pm_vptpt));
+	    l2pa | pmap_pte_proto(pmap_kernel(),
+				  VM_PROT_READ|VM_PROT_WRITE,
+				  PTE_PROTO_NOCACHE),
+			 pmap->pm_vptpt));
 
 	/* Map the page table into the page table area. */
 	if (selfref) {
 		*((pt_entry_t *)(pmap->pm_vptpt + ptva)) =
-			L2_PTE_NC_NB(l2pa, AP_KRW);
+		    l2pa | pmap_pte_proto(pmap_kernel(),
+					  VM_PROT_READ|VM_PROT_WRITE,
+					  PTE_PROTO_NOCACHE);
 	}
 	/* XXX should be a purge */
 /*	cpu_tlb_flushD();*/
@@ -1392,7 +1400,11 @@ pmap_alloc_l1pt(void)
 
 		/* Revoke cacheability and bufferability */
 		/* XXX should be done better than this */
-		ptes[arm_byte_to_page(va)] &= ~(PT_C | PT_B);
+		ptes[arm_byte_to_page(va)] =
+		    (ptes[arm_byte_to_page(va)] & PG_FRAME) |
+		    pmap_pte_proto(pmap_kernel(),
+				   VM_PROT_READ|VM_PROT_WRITE,
+				   PTE_PROTO_NOCACHE);
 
 		va += NBPG;
 		m = m->pageq.tqe_next;
@@ -1506,7 +1518,9 @@ pmap_allocpagedir(pmap)
 	/* Revoke cacheability and bufferability */
 	/* XXX should be done better than this */
 	pte = pmap_pte(pmap_kernel(), pmap->pm_vptpt);
-	*pte = *pte & ~(PT_C | PT_B);
+	*pte = (*pte & PG_FRAME) | pmap_pte_proto(pmap_kernel(),
+						  VM_PROT_READ|VM_PROT_WRITE,
+						  PTE_PROTO_NOCACHE);
 
 	/* Wire in this page table */
 	pmap_map_in_l1(pmap, PROCESS_PAGE_TBLS_BASE, pmap->pm_pptpt, TRUE);
@@ -1878,10 +1892,17 @@ pmap_zero_page(phys)
 	 * Hook in the page, zero it, and purge the cache for that
 	 * zeroed page. Invalidate the TLB as needed.
 	 */
-	*page_hook0.pte = L2_PTE(phys & PG_FRAME, AP_KRW);
+	KDASSERT((phys & PG_FRAME) == phys);
+	*page_hook0.pte = phys |
+	    pmap_pte_proto(pmap_kernel(),
+			   VM_PROT_READ|VM_PROT_WRITE,
+			   PTE_PROTO_CACHE);
 	cpu_tlb_flushD_SE(page_hook0.va);
+
 	cpu_cpwait();
+
 	bzero_page(page_hook0.va);
+
 	cpu_dcache_wbinv_range(page_hook0.va, NBPG);
 }
 
@@ -1910,7 +1931,11 @@ pmap_pageidlezero(phys)
 	 * Hook in the page, zero it, and purge the cache for that
 	 * zeroed page. Invalidate the TLB as needed.
 	 */
-	*page_hook0.pte = L2_PTE(phys & PG_FRAME, AP_KRW);
+	KDASSERT((phys & PG_FRAME) == phys);
+	*page_hook0.pte = phys |
+	    pmap_pte_proto(pmap_kernel(),
+			   VM_PROT_READ|VM_PROT_WRITE,
+			   PTE_PROTO_CACHE);
 	cpu_tlb_flushD_SE(page_hook0.va);
 	cpu_cpwait();
 
@@ -1971,12 +1996,24 @@ pmap_copy_page(src, dest)
 	 * the cache for the appropriate page. Invalidate the TLB
 	 * as required.
 	 */
-	*page_hook0.pte = L2_PTE(src & PG_FRAME, AP_KRW);
-	*page_hook1.pte = L2_PTE(dest & PG_FRAME, AP_KRW);
+	KDASSERT((src & PG_FRAME) == src);
+	*page_hook0.pte = src |		/* XXX should be r/o */
+	    pmap_pte_proto(pmap_kernel(),
+			   VM_PROT_READ|VM_PROT_WRITE,
+			   PTE_PROTO_CACHE);
 	cpu_tlb_flushD_SE(page_hook0.va);
+
+	KDASSERT((dst & PG_FRAME) == dst);
+	*page_hook1.pte = dest |
+	    pmap_pte_proto(pmap_kernel(),
+			   VM_PROT_READ|VM_PROT_WRITE,
+			   PTE_PROTO_CACHE);
 	cpu_tlb_flushD_SE(page_hook1.va);
+
 	cpu_cpwait();
+
 	bcopy_page(page_hook0.va, page_hook1.va);
+
 	cpu_dcache_wbinv_range(page_hook0.va, NBPG);
 	cpu_dcache_wbinv_range(page_hook1.va, NBPG);
 }
@@ -2194,6 +2231,7 @@ pmap_vac_me_user(struct pmap *pmap, stru
 	int cacheable_entries = 0;
 	int kern_cacheable = 0;
 	int other_writable = 0;
+	int prot;
 
 	pv = pvh->pvh_list;
 	KASSERT(ptes != NULL);
@@ -2237,12 +2275,12 @@ pmap_vac_me_user(struct pmap *pmap, stru
 		if (cacheable_entries == 0)
 		    return;
 		for (npv = pv; npv; npv = npv->pv_next) {
-			if ((pmap == npv->pv_pmap 
-			    || kpmap == npv->pv_pmap) && 
+			if ((pmap == npv->pv_pmap ||
+			     kpmap == npv->pv_pmap) && 
 			    (npv->pv_flags & PT_NC) == 0) {
-				ptes[arm_byte_to_page(npv->pv_va)] &= 
-				    ~(PT_C | PT_B);
- 				npv->pv_flags |= PT_NC;
+				prot = (npv->pv_flags & PT_Wr) ?
+				    VM_PROT_READ | VM_PROT_WRITE :
+				    VM_PROT_READ;
 				/*
 				 * If this page needs flushing from the
 				 * cache, and we aren't going to do it
@@ -2256,6 +2294,11 @@ pmap_vac_me_user(struct pmap *pmap, stru
 					    NBPG);
 					cpu_tlb_flushID_SE(npv->pv_va);
 				}
+				ptes[arm_byte_to_page(npv->pv_va)] =
+				    ptes[arm_byte_to_page(npv->pv_va)] |
+				    pmap_pte_proto(npv->pv_pmap, prot,
+						   PTE_PROTO_NOCACHE);
+ 				npv->pv_flags |= PT_NC;
 			}
 		}
 		if ((clear_cache && cacheable_entries >= 4) ||
@@ -2273,8 +2316,13 @@ pmap_vac_me_user(struct pmap *pmap, stru
 			if ((pmap == npv->pv_pmap ||
 			    (kpmap == npv->pv_pmap && other_writable == 0)) && 
 			    (npv->pv_flags & PT_NC)) {
-				ptes[arm_byte_to_page(npv->pv_va)] |=
-				    pte_cache_mode;
+				prot = (npv->pv_flags & PT_Wr) ?
+				    VM_PROT_READ | VM_PROT_WRITE :
+				    VM_PROT_READ;
+				ptes[arm_byte_to_page(npv->pv_va)] =
+				    ptes[arm_byte_to_page(npv->pv_va)] |
+				    pmap_pte_proto(npv->pv_pmap, prot,
+						   PTE_PROTO_CACHE);
 				npv->pv_flags &= ~PT_NC;
 			}
 		}
@@ -2818,33 +2866,46 @@ pmap_enter(pmap, va, pa, prot, flags)
 #endif
 
 	/* Construct the pte, giving the correct access. */
-	npte = (pa & PG_FRAME);
+	KDASSERT((pa & PG_FRAME) == pa);
+	npte = pa;
 
-	/* VA 0 is magic. */
-	if (pmap != pmap_kernel() && va != 0)
-		npte |= PT_AP(AP_U);
-
+	/*
+	 * VA 0 is magic; that's where the vector page is.  User pmaps
+	 * always need to see an un-cached view of this page (which they
+	 * would anyway, since it's not in the managed page pool, so there
+	 * is no need to check for it).
+	 */
 	if (pmap_initialized && bank != -1) {
+		KDASSERT(va != 0);
 #ifdef DIAGNOSTIC
 		if ((flags & VM_PROT_ALL) & ~prot)
 			panic("pmap_enter: access_type exceeds prot");
 #endif
-		npte |= pte_cache_mode;
+		/*
+		 * XXXJRT -- consider optimization potential.
+		 * C.f. Alpha pmap.
+		 */
 		if (flags & VM_PROT_WRITE) {
-			npte |= L2_SPAGE | PT_AP(AP_W);
+			npte |= pmap_pte_proto(pmap,
+					       VM_PROT_READ|VM_PROT_WRITE,
+					       PTE_PROTO_CACHE);
 			vm_physmem[bank].pmseg.attrs[off] |= PT_H | PT_M;
 		} else if (flags & VM_PROT_ALL) {
-			npte |= L2_SPAGE;
+			npte |= pmap_pte_proto(pmap,
+					       VM_PROT_READ,
+					       PTE_PROTO_CACHE);
 			vm_physmem[bank].pmseg.attrs[off] |= PT_H;
-		} else
-			npte |= L2_INVAL;
+		}
+		/*
+		 * ...else we want to take a fault, so don't do anything
+		 * to the PTE here.
+		 */
 	} else {
-		if (prot & VM_PROT_WRITE)
-			npte |= L2_SPAGE | PT_AP(AP_W);
-		else if (prot & VM_PROT_ALL)
-			npte |= L2_SPAGE;
-		else
-			npte |= L2_INVAL;
+		/*
+		 * Non-managed pages entered via this interface
+		 * are implicitly un-cached.
+		 */
+		npte |= pmap_pte_proto(pmap, prot, PTE_PROTO_NOCACHE);
 	}
 
 #ifdef MYCROFT_HACK
@@ -2920,7 +2981,14 @@ pmap_kenter_pa(va, pa, prot)
 	}
 	pte = vtopte(va);
 	KASSERT(!pmap_pte_v(pte));
-	*pte = L2_PTE(pa, AP_KRW);
+#if 1 /* XXX */
+	*pte = pa | pmap_pte_proto(pmap_kernel(),
+				   VM_PROT_READ|VM_PROT_WRITE,
+				   PTE_PROTO_CACHE);
+#else
+	*pte = pa | pmap_pte_proto(pmap_kernel(), prot,
+				   PTE_PROTO_CACHE);
+#endif
 }
 
 void
@@ -3357,7 +3425,7 @@ pmap_clearbit(pa, maskbits)
 		pte = pmap_pte(pv->pv_pmap, va);
 		KASSERT(pte != NULL);
 		if (maskbits & (PT_Wr|PT_M)) {
-			if ((pv->pv_flags & PT_NC)) {
+			if (pv->pv_flags & PT_NC) {
 				/* 
 				 * Entry is not cacheable: reenable
 				 * the cache, nothing to flush
@@ -3375,32 +3443,52 @@ pmap_clearbit(pa, maskbits)
 				 *
 				 */
 				if (maskbits & PT_Wr) {
-					*pte |= pte_cache_mode;
+					/*
+					 * Clear the NC bit in the pv
+					 * entry; we'll update the PTE
+					 * below.
+					 */
 					pv->pv_flags &= ~PT_NC;
 				}
-			} else if (pmap_is_curpmap(pv->pv_pmap))
-				/* 
+			} else if (pmap_is_curpmap(pv->pv_pmap)) {
+				/*
 				 * Entry is cacheable: check if pmap is
-				 * current if it is flush it,
-				 * otherwise it won't be in the cache
+				 * current, and if it is, flush it,
+				 * otherwise it won't be in the cache.
 				 */
 				cpu_idcache_wbinv_range(pv->pv_va, NBPG);
+			}
 
-			/* make the pte read only */
-			*pte &= ~PT_AP(AP_W);
+			/* Make the PTE read-only. */
+			*pte = (*pte & PG_FRAME) |
+			    pmap_pte_proto(pv->pv_pmap, VM_PROT_READ,
+					   (pv->pv_flags & PT_NC) ?
+					   PTE_PROTO_NOCACHE :
+					   PTE_PROTO_CACHE);
 		}
+
+		if (maskbits & PT_H) {
+			/*
+			 * We are going to revoke the mapping for this
+			 * page.  If it is writable, make sure to flush
+			 * it from the cache.
+			 *
+			 * XXXJRT This flush might be redundant!
+			 */
+			if ((pv->pv_flags & PT_Wr) != 0 &&
+			    pmap_is_curpmap(pv->pv_pmap))
+				cpu_idcache_wbinv_range(pv->pv_va, NBPG);
 
-		if (maskbits & PT_H)
-			*pte = (*pte & ~L2_MASK) | L2_INVAL;
+			*pte = *pte & PG_FRAME;
+		}
 
-		if (pmap_is_curpmap(pv->pv_pmap))
+		if (pmap_is_curpmap(pv->pv_pmap)) {
 			/* 
-			 * if we had cacheable pte's we'd clean the
-			 * pte out to memory here
-			 *
-			 * flush tlb entry as it's in the current pmap
+			 * The PTE has been modifed, and it's in the
+			 * current pmap, invalidate the TLB entry.
 			 */
 			cpu_tlb_flushID_SE(pv->pv_va); 
+		}
 	}
 	cpu_cpwait();
 
@@ -3499,6 +3587,7 @@ pmap_modified_emulation(pmap, va)
 		return(0);
 
 	/* This can happen if user code tries to access kernel memory. */
+	/* XXXJRT Use address-based check.  C.f. Alpha pmap. */
 	if ((*pte & PT_AP(AP_W)) != 0)
 		return (0);
 
@@ -3540,7 +3629,10 @@ pmap_modified_emulation(pmap, va)
 	 * already set the cacheable bits based on the assumption that we
 	 * can write to this page.
 	 */
-	*pte = (*pte & ~L2_MASK) | L2_SPAGE | PT_AP(AP_W);
+	*pte = (*pte & PG_FRAME) |
+	    pmap_pte_proto(pmap, VM_PROT_READ|VM_PROT_WRITE,
+			   (flags & PT_NC) ? PTE_PROTO_NOCACHE
+					   : PTE_PROTO_CACHE);
 	PDEBUG(0, printf("->(%08x)\n", *pte));
 
 	simple_unlock(&pvh->pvh_lock);
@@ -3558,8 +3650,9 @@ pmap_handled_emulation(pmap, va)
 	vaddr_t va;
 {
 	pt_entry_t *pte;
+	struct pv_head *pvh;
 	paddr_t pa;
-	int bank, off;
+	int bank, off, flags;
 
 	PDEBUG(2, printf("pmap_handled_emulation\n"));
 
@@ -3585,15 +3678,35 @@ pmap_handled_emulation(pmap, va)
 	if ((bank = vm_physseg_find(atop(pa), &off)) == -1)
 		return(0);
 
+	PMAP_HEAD_TO_MAP_LOCK();
+	/* Get the current flags for this page. */
+	pvh = &vm_physmem[bank].pmseg.pvhead[off];
+	/* XXX: needed if we hold head->map lock? */
+	simple_lock(&pvh->pvh_lock);
+
+	/*
+	 * XXXJRT Get the cacheable/non-cacheable state for this
+	 * XXXJRT mapping.  This should die, in favor of stuffing
+	 * XXXJRT these bits into the vm_page.
+	 */
+	flags = pmap_modify_pv(pmap, va, pvh, 0, 0);
+
 	/*
-	 * Ok we just enable the pte and mark the attibs as handled
+	 * Ok we just enable the pte and mark the attribs as handled
 	 */
 	PDEBUG(0, printf("pmap_handled_emulation: Got a hit va=%08lx pte = %p (%08x)\n",
 	    va, pte, *pte));
 	vm_physmem[bank].pmseg.attrs[off] |= PT_H;
-	*pte = (*pte & ~L2_MASK) | L2_SPAGE;
+	*pte = (*pte & PG_FRAME) | pmap_pte_proto(pmap,
+						  VM_PROT_READ,
+						  (flags & PT_NC) ?
+						  PTE_PROTO_NOCACHE :
+						  PTE_PROTO_CACHE);
 	PDEBUG(0, printf("->(%08x)\n", *pte));
 
+	simple_unlock(&pvh->pvh_lock);
+	PMAP_HEAD_TO_MAP_UNLOCK();
+
 	/* Return, indicating the problem has been dealt with */
 	cpu_tlb_flushID_SE(va);
 	cpu_cpwait();
@@ -3719,5 +3832,411 @@ pmap_alloc_ptp(struct pmap *pmap, vaddr_
 //	pmap->pm_ptphint = ptp;
 	return (ptp);
 }
+
+/************************ Bootstrapping routines ****************************/
+
+/*
+ * pmap_map_section:
+ *
+ *	Create a single section mapping.
+ */
+void
+pmap_map_section(vaddr_t l1pt, vaddr_t va, paddr_t pa, int prot, int cache)
+{
+	pd_entry_t *pde = (pd_entry_t *) l1pt;
+
+	KASSERT(((va | pa) & (L1_SEC_SIZE - 1)) == 0);
+
+	pde[va >> PDSHIFT] = pa | l1sec_proto(prot, cache);
+}
+
+/*
+ * pmap_map_entry:
+ *
+ *	Create a single page mapping.
+ */
+void
+pmap_map_entry(vaddr_t l2pt, vaddr_t va, paddr_t pa, int prot, int cache)
+{
+	pt_entry_t *pte = (pt_entry_t *) l2pt;
+
+#ifndef cats
+	pte[(va >> PGSHIFT) & 0x3ff] =
+	    pa | pte_proto(PTE_PROTO_KERNEL, prot, cache);
+#else
+	pte[(va >> PGSHIFT) & 0x7ff] =
+	    pa | pte_proto(PTE_PROTO_KERNEL, prot, cache);
+#endif /* cats */
+}
+
+/*
+ * pmap_map_l2pt:
+ *
+ *	Map L2 page table at the specified physical address
+ *	into the slot for the specified virtual address in
+ *	the L1 table.
+ */
+void
+pmap_map_l2pt(vaddr_t l1pt, vaddr_t va, paddr_t pa)
+{
+	pd_entry_t *pde = (pd_entry_t *) l1pt;
+
+	KASSERT((pa & PG_FRAME) == pa);
+
+	pde[(va >> PDSHIFT) + 0] = (pa + 0x000) | pde_proto;
+	pde[(va >> PDSHIFT) + 1] = (pa + 0x400) | pde_proto;
+	pde[(va >> PDSHIFT) + 2] = (pa + 0x800) | pde_proto;
+	pde[(va >> PDSHIFT) + 3] = (pa + 0xc00) | pde_proto;
+}
+
+/*
+ * pmap_map_chunk:
+ *
+ *	Map a chunk of memory using the most efficient mappings
+ *	possible (section, large page, small page) into the
+ *	provided L1 and L2 tables at the specified virtual address.
+ */
+vsize_t
+pmap_map_chunk(vaddr_t l1pt, vaddr_t l2pt, vaddr_t va, paddr_t pa,
+    vsize_t size, int prot, int cache)
+{
+	pd_entry_t *pde = (pd_entry_t *) l1pt;
+	pt_entry_t *pte = (pt_entry_t *) l2pt;
+	vsize_t resid;
+	int i;
+
+	resid = (size + (NBPG - 1)) & ~(NBPG - 1);
+
+#ifdef VERBOSE_INIT_ARM
+	printf("pmap_map_chunk: pa=0x%lx va=0x%lx size=0x%lx resid=0x%lx "
+	    "prot=0x%x cache=%d\n", pa, va, size, resid, prot, cache);
+#endif
+
+	size = resid;
+
+	while (resid > 0) {
+		/* See if we can use a section mapping. */
+		if (l1pt &&
+		    ((pa | va) & (L1_SEC_SIZE - 1)) == 0 &&
+		    resid >= L1_SEC_SIZE) {
+#ifdef VERBOSE_INIT_ARM
+			printf("S");
+#endif
+			pde[va >> PDSHIFT] = pa | l1sec_proto(prot, cache);
+			va += L1_SEC_SIZE;
+			pa += L1_SEC_SIZE;
+			resid -= L1_SEC_SIZE;
+			continue;
+		}
+
+		/* See if we can use a L2 large page mapping. */
+		if (((pa | va) & (L2_LPAGE_SIZE - 1)) == 0 &&
+		    resid >= L2_LPAGE_SIZE) {
+#ifdef VERBOSE_INIT_ARM
+			printf("L");
+#endif
+			for (i = 0; i < 16; i++) {
+#ifndef cats /* XXXJRT */
+				pte[((va >> PGSHIFT) & 0x3f0) + i] = pa |
+				    lpte_proto(prot, cache);
+#else
+				pte[((va >> PGSHIFT) & 0x7f0) + i] = pa |
+				    lpte_proto(prot, cache);
+#endif /* cats */
+			}
+			va += L2_LPAGE_SIZE;
+			pa += L2_LPAGE_SIZE;
+			resid -= L2_LPAGE_SIZE;
+			continue;
+		}
+
+		/* Use a small page mapping. */
+#ifdef VERBOSE_INIT_ARM
+		printf("P");
+#endif
+#ifndef cats /* XXXJRT */
+		pte[(va >> PGSHIFT) & 0x3ff] = pa |
+		    pte_proto(PTE_PROTO_KERNEL, prot, cache);
+#else
+		pte[(va >> PGSHIFT) & 0x7ff] = pa |
+		    pte_proto(PTE_PROTO_KERNEL, prot, cache);
+#endif /* cats */
+		va += NBPG;
+		pa += NBPG;
+		resid -= NBPG;
+	}
+#ifdef VERBOSE_INIT_ARM
+	printf("\n");
+#endif
+	return (size);
+}
+
+/*
+ * pmap_pte_protos_init:
+ *
+ *	Initialize the prototype PTE arrays.  This is done very
+ *	early, right after the cpufunc vector is selected.
+ */
+#if defined(CPU_ARM6) || defined(CPU_ARM7) || defined(CPU_ARM7TDMI) || \
+    defined(CPU_ARM8) || defined(CPU_SA110)
+void
+pmap_pte_protos_init_arm678(void)
+{
+	int prot;
+
+	/*
+	 * NOTE: For all ARM6, ARM7, and ARM8 CPUs, bit 4 (the
+	 * implementation defined bit) of L1 descriptors should
+	 * be set to 1.
+	 */
+
+	pde_proto = L1_PAGE | PT_U;
+
+#define	CACHE	(PT_B|PT_C)
+
+	for (prot = 0; prot < 8; prot++) {
+		if (prot & VM_PROT_WRITE) {
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KRW) | CACHE;
+
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KRW);
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWURW) | CACHE;
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWURW);
+
+			l1sec_proto(prot, PTE_PROTO_CACHE) =
+			    (AP_KRW << AP_SECTION_SHIFT) |
+			    L1_SECTION | PT_U | CACHE;
+
+			l1sec_proto(prot, PTE_PROTO_NOCACHE) =
+			    (AP_KRW << AP_SECTION_SHIFT) |
+			    L1_SECTION | PT_U;
+
+			lpte_proto(prot, PTE_PROTO_CACHE) =
+			    L2_LPAGE | PT_AP(AP_KRW) | CACHE;
+
+			lpte_proto(prot, PTE_PROTO_NOCACHE) =
+			    L2_LPAGE | PT_AP(AP_KRW);
+		} else if (prot & VM_PROT_ALL) {
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KR) | CACHE;
+
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KR);
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWUR) | CACHE;
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWUR);
+
+			l1sec_proto(prot, PTE_PROTO_CACHE) =
+			    (AP_KR << AP_SECTION_SHIFT) |
+			    L1_SECTION | PT_U | CACHE;
+
+			l1sec_proto(prot, PTE_PROTO_NOCACHE) =
+			    (AP_KR << AP_SECTION_SHIFT) |
+			    L1_SECTION | PT_U;
+
+			lpte_proto(prot, PTE_PROTO_CACHE) =
+			    L2_LPAGE | PT_AP(AP_KR) | CACHE;
+
+			lpte_proto(prot, PTE_PROTO_NOCACHE) =
+			    L2_LPAGE | PT_AP(AP_KR);
+		}
+	}
+#undef CACHE
+}
+#endif /* CPU_ARM6 || CPU_ARM7 || CPU_ARM7TDMI || CPU_ARM8 || CPU_SA110 */
+
+#if defined(CPU_ARM9)
+void
+pmap_pte_protos_init_arm9(void)
+{
+	int prot;
+
+	/*
+	 * NOTE: For all ARM9 CPUs, bit 4 (the implementation defined
+	 * bit) of L1 descriptors should be set to 1.
+	 */
+
+	pde_proto = L1_PAGE | PT_U;
+
+/* Use the cache in write-through mode for now. */
+#define	CACHE	(PT_C)
+
+	for (prot = 0; prot < 8; prot++) {
+		if (prot & VM_PROT_WRITE) {
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KRW) | CACHE;
+
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KRW);
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWURW) | CACHE;
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWURW);
+
+			l1sec_proto(prot, PTE_PROTO_CACHE) =
+			    (AP_KRW << AP_SECTION_SHIFT) |
+			    L1_SECTION | PT_U | CACHE;
+
+			l1sec_proto(prot, PTE_PROTO_NOCACHE) =
+			    (AP_KRW << AP_SECTION_SHIFT) |
+			    L1_SECTION | PT_U;
+
+			lpte_proto(prot, PTE_PROTO_CACHE) =
+			    L2_LPAGE | PT_AP(AP_KRW) | CACHE;
+
+			lpte_proto(prot, PTE_PROTO_NOCACHE) =
+			    L2_LPAGE | PT_AP(AP_KRW);
+		} else if (prot & VM_PROT_ALL) {
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KR) | CACHE;
+
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KR);
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWUR) | CACHE;
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWUR);
+
+			l1sec_proto(prot, PTE_PROTO_CACHE) =
+			    (AP_KR << AP_SECTION_SHIFT) |
+			    L1_SECTION | PT_U | CACHE;
+
+			l1sec_proto(prot, PTE_PROTO_NOCACHE) =
+			    (AP_KR << AP_SECTION_SHIFT) |
+			    L1_SECTION | PT_U;
+
+			lpte_proto(prot, PTE_PROTO_CACHE) =
+			    L2_LPAGE | PT_AP(AP_KR) | CACHE;
+
+			lpte_proto(prot, PTE_PROTO_NOCACHE) =
+			    L2_LPAGE | PT_AP(AP_KR);
+		}
+	}
+#undef CACHE
+}
+#endif /* CPU_ARM9 */
+
+#if defined(CPU_XSCALE)
+void
+pmap_pte_protos_init_xscale(void)
+{
+	int prot;
+
+/*
+ * i80200 errata item #40: Store to cacheable memory,
+ * interrupted by an exception, may inadvertently
+ * write to memory.
+ *
+ * This can have an adverse affect on copy-on-write
+ * operation.
+ *
+ * Work-around: Non-writable mappings should have
+ * a cache mode of write-through (this avoids the
+ * problem).  This has no adverse performance affect,
+ * since the mappings are read-only.
+ */
+#define	CACHE_WT	(PT_C)
+#define	CACHE_WB	(PT_C)		/* XXX for now */
+
+	/*
+	 * NOTE: For all XScale CPUs, bit 4 (the implementation defined
+	 * bit) of L1 descriptors should be set to 0.
+	 */
+
+	pde_proto = L1_PAGE;
+
+	for (prot = 0; prot < 8; prot++) {
+		if (prot & VM_PROT_WRITE) {
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KRW) | CACHE_WB;
+
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KRW);
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWURW) | CACHE_WB;
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWURW);
+
+			l1sec_proto(prot, PTE_PROTO_CACHE) =
+			    (AP_KRW << AP_SECTION_SHIFT) |
+			    L1_SECTION | CACHE_WB;
+
+			l1sec_proto(prot, PTE_PROTO_NOCACHE) =
+			    (AP_KRW << AP_SECTION_SHIFT) |
+			    L1_SECTION;
 
-/* End of pmap.c */
+			lpte_proto(prot, PTE_PROTO_CACHE) =
+			    L2_LPAGE | PT_AP(AP_KRW) | CACHE_WB;
+
+			lpte_proto(prot, PTE_PROTO_NOCACHE) =
+			    L2_LPAGE | PT_AP(AP_KRW);
+		} else if (prot & VM_PROT_ALL) {
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KR) | CACHE_WT;
+
+			pte_proto(PTE_PROTO_KERNEL, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KR);
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_CACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWUR) | CACHE_WT;
+
+			pte_proto(PTE_PROTO_USER, prot,
+				  PTE_PROTO_NOCACHE) =
+			    L2_SPAGE | PT_AP(AP_KRWUR);
+
+			l1sec_proto(prot, PTE_PROTO_CACHE) =
+			    (AP_KRW << AP_SECTION_SHIFT) |
+			    L1_SECTION | CACHE_WT;
+
+			l1sec_proto(prot, PTE_PROTO_NOCACHE) =
+			    (AP_KRW << AP_SECTION_SHIFT) |
+			    L1_SECTION;
+
+			lpte_proto(prot, PTE_PROTO_CACHE) =
+			    L2_LPAGE | PT_AP(AP_KR) | CACHE_WT;
+
+			lpte_proto(prot, PTE_PROTO_NOCACHE) =
+			    L2_LPAGE | PT_AP(AP_KR);
+		}
+	}
+#undef CACHE_WT
+#undef CACHE_WB
+}
+#endif /* CPU_XSCALE */
Index: arm/include/arm32/machdep.h
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/arm/include/arm32/machdep.h,v
retrieving revision 1.3
diff -u -p -r1.3 machdep.h
--- arm/include/arm32/machdep.h	2002/01/20 03:41:48	1.3
+++ arm/include/arm32/machdep.h	2002/01/31 19:25:31
@@ -11,14 +11,6 @@ void prefetch_abort_handler __P((trapfra
 void undefinedinstruction_bounce __P((trapframe_t *));
 void dumpsys	__P((void));
 
-void	map_section(vaddr_t, vaddr_t, paddr_t, int);
-void	map_pagetable(vaddr_t, vaddr_t, paddr_t);
-void	map_entry(vaddr_t, vaddr_t, paddr_t);
-void	map_entry_nc(vaddr_t, vaddr_t, paddr_t);
-void	map_entry_ro(vaddr_t, vaddr_t, paddr_t); 
-vsize_t map_chunk(vaddr_t, vaddr_t, vaddr_t, paddr_t, vsize_t,
-	    u_int, u_int);
-
 /* 
  * note that we use void * as all the platforms have different ideas on what
  * the structure is
Index: arm/include/arm32/pmap.h
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/arm/include/arm32/pmap.h,v
retrieving revision 1.20
diff -u -p -r1.20 pmap.h
--- arm/include/arm32/pmap.h	2002/01/19 16:55:22	1.20
+++ arm/include/arm32/pmap.h	2002/01/31 19:25:31
@@ -138,11 +138,36 @@ typedef struct {
 } pv_addr_t;
 
 /*
- * _KERNEL specific macros, functions and prototypes
+ * Prototype PTE bits for each VM protection code, both cached
+ * and un-cached, kernel and userland.
  */
+extern pt_entry_t pte_protos[4][8];
+extern pd_entry_t l1sec_protos[2][8];
+extern pt_entry_t lpte_protos[2][8];
+extern pd_entry_t pde_proto;
+
+#define	PTE_PROTO_KERNEL	0
+#define	PTE_PROTO_USER		1
+#define	PTE_PROTO_NOCACHE	0
+#define	PTE_PROTO_CACHE		1
+
+#define	pte_proto(ku, prot, cache)					\
+	pte_protos[(ku) + ((cache) << 1)][(prot)]
+
+#define	l1sec_proto(prot, cache)					\
+	l1sec_protos[(cache)][(prot)]
+
+#define	lpte_proto(prot, cache)						\
+	lpte_protos[(cache)][(prot)]
+
+#define	pmap_pte_proto(pm, prot, cache)					\
+	pte_proto((pm == pmap_kernel()) ? PTE_PROTO_KERNEL		\
+					: PTE_PROTO_USER, (prot), (cache))
+
+void	pmap_pte_protos_init_arm678(void);
+void	pmap_pte_protos_init_arm9(void);
+void	pmap_pte_protos_init_xscale(void);
 
-#ifdef  _KERNEL
-
 /*
  * Commonly referenced structures
  */
@@ -176,13 +201,17 @@ int pmap_modified_emulation __P((struct 
 void pmap_postinit __P((void));
 pt_entry_t *pmap_pte __P((struct pmap *, vaddr_t));
 
+/* Bootstrapping routines. */
+void	pmap_map_section(vaddr_t, vaddr_t, paddr_t, int, int);
+void	pmap_map_entry(vaddr_t, vaddr_t, paddr_t, int, int);
+void	pmap_map_l2pt(vaddr_t, vaddr_t, paddr_t);
+vsize_t	pmap_map_chunk(vaddr_t, vaddr_t, vaddr_t, paddr_t, vsize_t, int, int);
+
 /*
  * Special page zero routine for use by the idle loop (no cache cleans). 
  */
 boolean_t	pmap_pageidlezero __P((paddr_t));
 #define PMAP_PAGEIDLEZERO(pa)	pmap_pageidlezero((pa))
-
-#endif	/* _KERNEL */
 
 /*
  * Useful macros and constants 
Index: arm/include/arm32/pte.h
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/arm/include/arm32/pte.h,v
retrieving revision 1.1
diff -u -p -r1.1 pte.h
--- arm/include/arm32/pte.h	2001/11/23 17:39:04	1.1
+++ arm/include/arm32/pte.h	2002/01/31 19:25:31
@@ -74,12 +74,6 @@ typedef	int	pt_entry_t;		/* page table e
 #define PT_C		0x08	/* Phys - Cacheable */
 #define PT_U		0x10	/* Phys - Updateable */
 
-#ifndef _LOCORE
-extern pt_entry_t	pte_cache_mode;
-
-#define PT_CACHEABLE	(pte_cache_mode)
-#endif
-
 /* Page R/M attributes (in pmseg.attrs). */
 #define PT_M		0x01	/* Virt - Modified */
 #define PT_H		0x02	/* Virt - Handled (Used) */
@@ -103,18 +97,6 @@ extern pt_entry_t	pte_cache_mode;
 #define L2_SPAGE	0x02	/* L2 small page (4KB) */
 #define L2_MASK		0x03	/* Mask for L2 entry type */
 #define L2_INVAL	0x00	/* L2 invalid type */
-
-/* PTE construction macros */
-#define	L2_LPTE(p, a, f)	((p) | PT_AP(a) | L2_LPAGE | (f))
-#define L2_SPTE(p, a, f)	((p) | PT_AP(a) | L2_SPAGE | (f))
-#define L2_PTE(p, a)		L2_SPTE((p), (a), PT_CACHEABLE)
-#define L2_PTE_NC(p, a)		L2_SPTE((p), (a), PT_B)
-#define L2_PTE_NC_NB(p, a)	L2_SPTE((p), (a), 0)
-#define L1_SECPTE(p, a, f)	((p) | ((a) << AP_SECTION_SHIFT) | (f) \
-				| L1_SECTION | PT_U)
-
-#define L1_PTE(p)	((p) | 0x00 | L1_PAGE | PT_U)
-#define L1_SEC(p, c)	L1_SECPTE((p), AP_KRW, (c))
 
 #define L1_SEC_SIZE	(1 << PDSHIFT)
 #define L2_LPAGE_SIZE	(NBPG * 16)
Index: arm/mainbus/mainbus_io.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/arm/mainbus/mainbus_io.c,v
retrieving revision 1.6
diff -u -p -r1.6 mainbus_io.c
--- arm/mainbus/mainbus_io.c	2001/11/23 17:23:42	1.6
+++ arm/mainbus/mainbus_io.c	2002/01/31 19:25:32
@@ -163,11 +163,13 @@ mainbus_bs_map(t, bpa, size, cacheable, 
 
 	for(pa = startpa; pa < endpa; pa += PAGE_SIZE, va += PAGE_SIZE) {
 		pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE);
-		pte = pmap_pte(pmap_kernel(), va);
-		if (cacheable)
-			*pte |= PT_CACHEABLE;
-		else
-			*pte &= ~PT_CACHEABLE;
+		if (cacheable == 0) {
+			pte = pmap_pte(pmap_kernel(), va);
+			*pte = (*pte & PG_FRAME) |
+			    pmap_pte_proto(pmap_kernel(),
+					   VM_PROT_READ|VM_PROT_WRITE,
+					   PTE_PROTO_NOCACHE);
+		}
 	}
 	pmap_update(pmap_kernel());
 
Index: evbarm/iq80310/iq80310_machdep.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/evbarm/iq80310/iq80310_machdep.c,v
retrieving revision 1.18
diff -u -p -r1.18 iq80310_machdep.c
--- evbarm/iq80310/iq80310_machdep.c	2002/01/30 04:01:36	1.18
+++ evbarm/iq80310/iq80310_machdep.c	2002/01/31 19:25:36
@@ -271,7 +271,8 @@ struct l1_sec_map {
 	vaddr_t	va;
 	vaddr_t	pa;
 	vsize_t	size;
-	int flags;
+	int prot;
+	int cache;
 } l1_sec_table[] = {
     /*
      * Map the on-board devices VA == PA so that we can access them
@@ -281,7 +282,8 @@ struct l1_sec_map {
 	IQ80310_OBIO_BASE,
 	IQ80310_OBIO_BASE,
 	IQ80310_OBIO_SIZE,
-	0,
+	VM_PROT_READ|VM_PROT_WRITE,
+	PTE_PROTO_NOCACHE,
     },
 
     {
@@ -576,16 +578,16 @@ initarm(void *arg)
 	l1pagetable = kernel_l1pt.pv_pa;
 
 	/* Map the L2 pages tables in the L1 page table */
-	map_pagetable(l1pagetable, 0x00000000,
+	pmap_map_l2pt(l1pagetable, 0x00000000,
 	    kernel_pt_table[KERNEL_PT_SYS]);
-	map_pagetable(l1pagetable, KERNEL_BASE,
+	pmap_map_l2pt(l1pagetable, KERNEL_BASE,
 	    kernel_pt_table[KERNEL_PT_KERNEL]);
-	map_pagetable(l1pagetable, IQ80310_IOPXS_VBASE,
+	pmap_map_l2pt(l1pagetable, IQ80310_IOPXS_VBASE,
 	    kernel_pt_table[KERNEL_PT_IOPXS]);
 	for (loop = 0; loop < KERNEL_PT_VMDATA_NUM; ++loop)
-		map_pagetable(l1pagetable, KERNEL_VM_BASE + loop * 0x00400000,
+		pmap_map_l2pt(l1pagetable, KERNEL_VM_BASE + loop * 0x00400000,
 		    kernel_pt_table[KERNEL_PT_VMDATA + loop]);
-	map_pagetable(l1pagetable, PROCESS_PAGE_TBLS_BASE,
+	pmap_map_l2pt(l1pagetable, PROCESS_PAGE_TBLS_BASE,
 	    kernel_ptpt.pv_pa);
 
 #ifdef VERBOSE_INIT_ARM
@@ -609,21 +611,21 @@ initarm(void *arg)
 		/*
 		 * This maps the kernel text/data/bss VA==PA.
 		 */
-		logical += map_chunk(l1pagetable, l2pagetable,
+		logical += pmap_map_chunk(l1pagetable, l2pagetable,
 		    KERNEL_BASE + logical,
 		    physical_start + logical, textsize,
-		    AP_KRW, PT_CACHEABLE);
-		logical += map_chunk(l1pagetable, l2pagetable,
+		    VM_PROT_READ|VM_PROT_WRITE, PTE_PROTO_CACHE);
+		logical += pmap_map_chunk(l1pagetable, l2pagetable,
 		    KERNEL_BASE + logical,
 		    physical_start + logical, totalsize - textsize,
-		    AP_KRW, PT_CACHEABLE);
+		    VM_PROT_READ|VM_PROT_WRITE, PTE_PROTO_CACHE);
 
 #if 0 /* XXX No symbols yet. */
-		logical += map_chunk(l1pagetable, l2pagetable,
+		logical += pmap_map_chunk(l1pagetable, l2pagetable,
 		    KERNEL_BASE + logical,
 		    physical_start + logical, kernexec->a_syms + sizeof(int)
 		    + *(u_int *)((int)end + kernexec->a_syms + sizeof(int)),
-		    AP_KRW, PT_CACHEABLE);
+		    VM_PROT_READ|VM_PROT_WRITE, PTE_PROTO_CACHE);
 #endif
 	}
 
@@ -632,23 +634,29 @@ initarm(void *arg)
 #endif
 
 	/* Map the stack pages */
-	map_chunk(0, l2pagetable, irqstack.pv_va, irqstack.pv_pa,
-	    IRQ_STACK_SIZE * NBPG, AP_KRW, PT_CACHEABLE);
-	map_chunk(0, l2pagetable, abtstack.pv_va, abtstack.pv_pa,
-	    ABT_STACK_SIZE * NBPG, AP_KRW, PT_CACHEABLE);
-	map_chunk(0, l2pagetable, undstack.pv_va, undstack.pv_pa,
-	    UND_STACK_SIZE * NBPG, AP_KRW, PT_CACHEABLE);
-	map_chunk(0, l2pagetable, kernelstack.pv_va, kernelstack.pv_pa,
-	    UPAGES * NBPG, AP_KRW, PT_CACHEABLE);
-	map_chunk(0, l2pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
-	    PD_SIZE, AP_KRW, 0);
+	pmap_map_chunk(0, l2pagetable, irqstack.pv_va, irqstack.pv_pa,
+	    IRQ_STACK_SIZE * NBPG, VM_PROT_READ|VM_PROT_WRITE,
+	    PTE_PROTO_CACHE);
+	pmap_map_chunk(0, l2pagetable, abtstack.pv_va, abtstack.pv_pa,
+	    ABT_STACK_SIZE * NBPG, VM_PROT_READ|VM_PROT_WRITE,
+	    PTE_PROTO_CACHE);
+	pmap_map_chunk(0, l2pagetable, undstack.pv_va, undstack.pv_pa,
+	    UND_STACK_SIZE * NBPG, VM_PROT_READ|VM_PROT_WRITE,
+	    PTE_PROTO_CACHE);
+	pmap_map_chunk(0, l2pagetable, kernelstack.pv_va, kernelstack.pv_pa,
+	    UPAGES * NBPG, VM_PROT_READ|VM_PROT_WRITE,
+	    PTE_PROTO_CACHE);
+	pmap_map_chunk(0, l2pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
+	    PD_SIZE, VM_PROT_READ|VM_PROT_WRITE,
+	    PTE_PROTO_NOCACHE);
 
 	/* Map the Mini-Data cache clean area. */
-	map_chunk(0, l2pagetable, minidataclean.pv_va, minidataclean.pv_pa,
-	    NBPG, AP_KRW, PT_CACHEABLE);
+	pmap_map_chunk(0, l2pagetable, minidataclean.pv_va, minidataclean.pv_pa,
+	    NBPG, VM_PROT_READ|VM_PROT_WRITE, PTE_PROTO_CACHE);
 
 	/* Map the page table that maps the kernel pages */
-	map_entry_nc(l2pagetable, kernel_ptpt.pv_pa, kernel_ptpt.pv_pa);
+	pmap_map_entry(l2pagetable, kernel_ptpt.pv_pa, kernel_ptpt.pv_pa,
+	    VM_PROT_READ|VM_PROT_WRITE, PTE_PROTO_NOCACHE);
 
 	/*
 	 * Map entries in the page table used to map PTE's
@@ -656,23 +664,28 @@ initarm(void *arg)
 	 */
 	/* The -2 is slightly bogus, it should be -log2(sizeof(pt_entry_t)) */
 	l2pagetable = kernel_ptpt.pv_pa;
-	map_entry_nc(l2pagetable, (KERNEL_BASE >> (PGSHIFT-2)),
-	    kernel_pt_table[KERNEL_PT_KERNEL]);
-	map_entry_nc(l2pagetable, (PROCESS_PAGE_TBLS_BASE >> (PGSHIFT-2)),
-	    kernel_ptpt.pv_pa);
-	map_entry_nc(l2pagetable, (0x00000000 >> (PGSHIFT-2)),
-	    kernel_pt_table[KERNEL_PT_SYS]);
+	pmap_map_entry(l2pagetable, (KERNEL_BASE >> (PGSHIFT-2)),
+	    kernel_pt_table[KERNEL_PT_KERNEL], VM_PROT_READ|VM_PROT_WRITE,
+	    PTE_PROTO_NOCACHE);
+	pmap_map_entry(l2pagetable, (PROCESS_PAGE_TBLS_BASE >> (PGSHIFT-2)),
+	    kernel_ptpt.pv_pa, VM_PROT_READ|VM_PROT_WRITE,
+	    PTE_PROTO_NOCACHE);
+	pmap_map_entry(l2pagetable, (0x00000000 >> (PGSHIFT-2)),
+	    kernel_pt_table[KERNEL_PT_SYS], VM_PROT_READ|VM_PROT_WRITE,
+	    PTE_PROTO_NOCACHE);
 	for (loop = 0; loop < KERNEL_PT_VMDATA_NUM; ++loop)
-		map_entry_nc(l2pagetable, ((KERNEL_VM_BASE +
+		pmap_map_entry(l2pagetable, ((KERNEL_VM_BASE +
 		    (loop * 0x00400000)) >> (PGSHIFT-2)),
-		    kernel_pt_table[KERNEL_PT_VMDATA + loop]);
+		    kernel_pt_table[KERNEL_PT_VMDATA + loop],
+		    VM_PROT_READ|VM_PROT_WRITE, PTE_PROTO_NOCACHE);
 
 	/*
 	 * Map the system page in the kernel page table for the bottom 1Meg
 	 * of the virtual memory map.
 	 */
 	l2pagetable = kernel_pt_table[KERNEL_PT_SYS];
-	map_entry(l2pagetable, 0x00000000, systempage.pv_pa);
+	pmap_map_entry(l2pagetable, 0x00000000, systempage.pv_pa,
+	    VM_PROT_READ|VM_PROT_WRITE, PTE_PROTO_CACHE);
 
 	/*
 	 * Map devices we can map w/ section mappings.
@@ -687,9 +700,11 @@ initarm(void *arg)
 		    l1_sec_table[loop].va);
 #endif
 		for (sz = 0; sz < l1_sec_table[loop].size; sz += L1_SEC_SIZE)
-			map_section(l1pagetable, l1_sec_table[loop].va + sz,
+			pmap_map_section(l1pagetable,
+			    l1_sec_table[loop].va + sz,
 			    l1_sec_table[loop].pa + sz,
-			    l1_sec_table[loop].flags);
+			    l1_sec_table[loop].prot,
+			    l1_sec_table[loop].cache);
 		++loop;
 	}
 
@@ -704,8 +719,9 @@ initarm(void *arg)
 	    I80312_PCI_XLATE_PIOW_BASE + I80312_PCI_XLATE_IOSIZE - 1,
 	    IQ80310_PIOW_VBASE);
 #endif
-	map_chunk(0, l2pagetable, IQ80310_PIOW_VBASE,
-	    I80312_PCI_XLATE_PIOW_BASE, I80312_PCI_XLATE_IOSIZE, AP_KRW, 0);
+	pmap_map_chunk(0, l2pagetable, IQ80310_PIOW_VBASE,
+	    I80312_PCI_XLATE_PIOW_BASE, I80312_PCI_XLATE_IOSIZE,
+	    VM_PROT_READ|VM_PROT_WRITE, PTE_PROTO_NOCACHE);
 
 #ifdef VERBOSE_INIT_ARM
 	printf("Mapping SIOW 0x%08lx -> 0x%08lx @ 0x%08lx\n",
@@ -713,8 +729,9 @@ initarm(void *arg)
 	    I80312_PCI_XLATE_SIOW_BASE + I80312_PCI_XLATE_IOSIZE - 1,
 	    IQ80310_SIOW_VBASE);
 #endif
-	map_chunk(0, l2pagetable, IQ80310_SIOW_VBASE,
-	    I80312_PCI_XLATE_SIOW_BASE, I80312_PCI_XLATE_IOSIZE, AP_KRW, 0);
+	pmap_map_chunk(0, l2pagetable, IQ80310_SIOW_VBASE,
+	    I80312_PCI_XLATE_SIOW_BASE, I80312_PCI_XLATE_IOSIZE,
+	    VM_PROT_READ|VM_PROT_WRITE, PTE_PROTO_NOCACHE);
 
 #ifdef VERBOSE_INIT_ARM
 	printf("Mapping 80312 0x%08lx -> 0x%08lx @ 0x%08lx\n",
@@ -722,8 +739,9 @@ initarm(void *arg)
 	    I80312_PMMR_BASE + I80312_PMMR_SIZE - 1,
 	    IQ80310_80312_VBASE);
 #endif
-	map_chunk(0, l2pagetable, IQ80310_80312_VBASE,
-	    I80312_PMMR_BASE, I80312_PMMR_SIZE, AP_KRW, 0);
+	pmap_map_chunk(0, l2pagetable, IQ80310_80312_VBASE,
+	    I80312_PMMR_BASE, I80312_PMMR_SIZE,
+	    VM_PROT_READ|VM_PROT_WRITE, PTE_PROTO_NOCACHE);
 
 	/*
 	 * Give the XScale global cache clean code an appropriately

--WYTEVAkct0FjGQmd--