Subject: new R10000 cache op implementation
To: None <port-mips@netbsd.org>
From: Takao Shinohara <shin@sm.sony.co.jp>
List: port-mips
Date: 10/25/2003 19:34:50
current implementation of R10000 cache op (arch/mips/cache_r10k.c rev. 1.1)
is broken. So, I wrote new implementation independent to Kiyohara-san.

How about this patch (at then end of this mail)?

--- Takao Shinohara

Index: include/cache_r10k.h
===================================================================
RCS file: /cvsroot/src/sys/arch/mips/include/cache_r10k.h,v
retrieving revision 1.1
diff -u -r1.1 cache_r10k.h
--- include/cache_r10k.h	2003/10/05 11:10:25	1.1
+++ include/cache_r10k.h	2003/10/25 10:01:13
@@ -69,94 +69,19 @@
 
 #if defined(_KERNEL) && !defined(_LOCORE)
 
-/*
- * cache_r10k_op_8lines_64:
- *
- *	Perform the specified cache operation on 8 64-byte cache lines.
- */
-#define	cache_r10k_op_8lines_64(va, op)					\
-do {									\
-	__asm __volatile(						\
-		".set noreorder					\n\t"	\
-		"cache %1, 0x000(%0); cache %1, 0x040(%0)	\n\t"	\
-		"cache %1, 0x080(%0); cache %1, 0x0c0(%0)	\n\t"	\
-		"cache %1, 0x100(%0); cache %1, 0x140(%0)	\n\t"	\
-		"cache %1, 0x180(%0); cache %1, 0x1c0(%0)	\n\t"	\
-		".set reorder"						\
-	    :								\
-	    : "r" (va), "i" (op)					\
-	    : "memory");						\
-} while (/*CONSTCOND*/0)
-
-/*
- * cache_r10k_op_32lines_64:
- *
- *	Perform the specified cache operation on 32 64-byte
- *	cache lines.
- */
-#define	cache_r10k_op_32lines_64(va, op)				\
-do {									\
-	__asm __volatile(						\
-		".set noreorder					\n\t"	\
-		"cache %1, 0x000(%0); cache %1, 0x040(%0);	\n\t"	\
-		"cache %1, 0x080(%0); cache %1, 0x0c0(%0);	\n\t"	\
-		"cache %1, 0x100(%0); cache %1, 0x140(%0);	\n\t"	\
-		"cache %1, 0x180(%0); cache %1, 0x1c0(%0);	\n\t"	\
-		"cache %1, 0x200(%0); cache %1, 0x240(%0);	\n\t"	\
-		"cache %1, 0x280(%0); cache %1, 0x2c0(%0);	\n\t"	\
-		"cache %1, 0x300(%0); cache %1, 0x340(%0);	\n\t"	\
-		"cache %1, 0x380(%0); cache %1, 0x3c0(%0);	\n\t"	\
-		"cache %1, 0x400(%0); cache %1, 0x440(%0);	\n\t"	\
-		"cache %1, 0x480(%0); cache %1, 0x4c0(%0);	\n\t"	\
-		"cache %1, 0x500(%0); cache %1, 0x540(%0);	\n\t"	\
-		"cache %1, 0x580(%0); cache %1, 0x5c0(%0);	\n\t"	\
-		"cache %1, 0x600(%0); cache %1, 0x640(%0);	\n\t"	\
-		"cache %1, 0x680(%0); cache %1, 0x6c0(%0);	\n\t"	\
-		"cache %1, 0x700(%0); cache %1, 0x740(%0);	\n\t"	\
-		"cache %1, 0x780(%0); cache %1, 0x7c0(%0);	\n\t"	\
-		".set reorder"						\
-	    :								\
-	    : "r" (va), "i" (op)					\
-	    : "memory");						\
-} while (/*CONSTCOND*/0)
-
-/*
- * cache_r10k_op_16lines_32_2way:
- *
- *	Perform the specified cache operation on 16 64-byte
- * 	cache lines, 2-ways.
- */
-#define	cache_r10k_op_16lines_64_2way(va1, va2, op)			\
-do {									\
-	__asm __volatile(						\
-		".set noreorder					\n\t"	\
-		"cache %2, 0x000(%0); cache %2, 0x000(%1);	\n\t"	\
-		"cache %2, 0x040(%0); cache %2, 0x040(%1);	\n\t"	\
-		"cache %2, 0x080(%0); cache %2, 0x080(%1);	\n\t"	\
-		"cache %2, 0x0c0(%0); cache %2, 0x0c0(%1);	\n\t"	\
-		"cache %2, 0x100(%0); cache %2, 0x100(%1);	\n\t"	\
-		"cache %2, 0x140(%0); cache %2, 0x140(%1);	\n\t"	\
-		"cache %2, 0x180(%0); cache %2, 0x180(%1);	\n\t"	\
-		"cache %2, 0x1c0(%0); cache %2, 0x1c0(%1);	\n\t"	\
-		"cache %2, 0x200(%0); cache %2, 0x200(%1);	\n\t"	\
-		"cache %2, 0x240(%0); cache %2, 0x240(%1);	\n\t"	\
-		"cache %2, 0x280(%0); cache %2, 0x280(%1);	\n\t"	\
-		"cache %2, 0x2c0(%0); cache %2, 0x2c0(%1);	\n\t"	\
-		"cache %2, 0x300(%0); cache %2, 0x300(%1);	\n\t"	\
-		"cache %2, 0x340(%0); cache %2, 0x340(%1);	\n\t"	\
-		"cache %2, 0x380(%0); cache %2, 0x380(%1);	\n\t"	\
-		"cache %2, 0x3c0(%0); cache %2, 0x3c0(%1);	\n\t"	\
-		".set reorder"						\
-	    :								\
-	    : "r" (va1), "r" (va2), "i" (op)				\
-	    : "memory");						\
-} while (/*CONSTCOND*/0)
-
-void	r10k_icache_sync_all_64(void);
-void	r10k_icache_sync_range_64(vaddr_t, vsize_t);
-void	r10k_icache_sync_range_index_64(vaddr_t, vsize_t);
-
+void	r10k_icache_sync_all(void);
+void	r10k_icache_sync_range(vaddr_t, vsize_t);
+void	r10k_icache_sync_range_index(vaddr_t, vsize_t);
+void	r10k_pdcache_wbinv_all(void);
+void	r10k_pdcache_wbinv_range(vaddr_t, vsize_t);
+void	r10k_pdcache_wbinv_range_index(vaddr_t, vsize_t);
+void	r10k_pdcache_inv_range(vaddr_t, vsize_t);
 void	r10k_pdcache_wb_range(vaddr_t, vsize_t);
+void	r10k_sdcache_wbinv_all(void);
+void	r10k_sdcache_wbinv_range(vaddr_t, vsize_t);
+void	r10k_sdcache_wbinv_range_index(vaddr_t, vsize_t);
+void	r10k_sdcache_inv_range(vaddr_t, vsize_t);
+void	r10k_sdcache_wb_range(vaddr_t, vsize_t);
 
 #endif /* _KERNEL && !_LOCORE */
 
Index: mips/cache.c
===================================================================
RCS file: /cvsroot/src/sys/arch/mips/mips/cache.c,v
retrieving revision 1.22
diff -u -r1.22 cache.c
--- mips/cache.c	2003/10/11 09:09:15	1.22
+++ mips/cache.c	2003/10/25 10:01:14
@@ -609,45 +609,30 @@
 #endif /* MIPS3_5900 */
 #ifdef ENABLE_MIPS4_CACHE_R10K
 	case MIPS_R10000:
-		/* cache spec */
+	case MIPS_R12000:
+	case MIPS_R14000:
 		mips_picache_ways = 2;
 		mips_pdcache_ways = 2;
 		mips_sdcache_ways = 2;
 
 		mips4_get_cache_config(csizebase);
 
-		switch (mips_picache_line_size) {
-		case 64:			/* 64 Byte */
-			mips_cache_ops.mco_icache_sync_all =
-			    r10k_icache_sync_all_64;
-			mips_cache_ops.mco_icache_sync_range =
-			    r10k_icache_sync_range_64;
-			mips_cache_ops.mco_icache_sync_range_index =
-			    r10k_icache_sync_range_index_64;
-			break;
-
-		default:
-			panic("r10k picache line size %d",
-			    mips_picache_line_size);
-		}
-		switch (mips_pdcache_line_size) {
-		case 32:			/* 32 Byte */
-			mips_cache_ops.mco_pdcache_wbinv_all =
-			    r5k_pdcache_wbinv_all_32;
-			mips_cache_ops.mco_pdcache_wbinv_range =
-			    r5k_pdcache_wbinv_range_32;
-			mips_cache_ops.mco_pdcache_wbinv_range_index =
-			    r5k_pdcache_wbinv_range_index_32;
-			mips_cache_ops.mco_pdcache_inv_range =
-			    r5k_pdcache_inv_range_32;
-			mips_cache_ops.mco_pdcache_wb_range =
-			    r10k_pdcache_wb_range;
-			break;
-
-		default:
-			panic("r10k pdcache line size %d",
-			    mips_pdcache_line_size);
-		}
+		mips_cache_ops.mco_icache_sync_all =
+		    r10k_icache_sync_all;
+		mips_cache_ops.mco_icache_sync_range =
+		    r10k_icache_sync_range;
+		mips_cache_ops.mco_icache_sync_range_index =
+		    r10k_icache_sync_range_index;
+		mips_cache_ops.mco_pdcache_wbinv_all =
+		    r10k_pdcache_wbinv_all;
+		mips_cache_ops.mco_pdcache_wbinv_range =
+		    r10k_pdcache_wbinv_range;
+		mips_cache_ops.mco_pdcache_wbinv_range_index =
+		    r10k_pdcache_wbinv_range_index;
+		mips_cache_ops.mco_pdcache_inv_range =
+		    r10k_pdcache_inv_range;
+		mips_cache_ops.mco_pdcache_wb_range =
+		    r10k_pdcache_wb_range;
 		break;
 #endif /* ENABLE_MIPS4_CACHE_R10K */
 #endif /* MIPS3 || MIPS4 */
@@ -768,53 +753,18 @@
 		break;
 #ifdef ENABLE_MIPS4_CACHE_R10K
 	case MIPS_R10000:
-		switch (mips_sdcache_ways) {
-		case 2:
-			switch (mips_sdcache_line_size) {
-			case 64:
-				mips_cache_ops.mco_sdcache_wbinv_all =
-				    r4k_sdcache_wbinv_all_generic;
-				mips_cache_ops.mco_sdcache_wbinv_range =
-				    r4k_sdcache_wbinv_range_generic;
-				mips_cache_ops.mco_sdcache_wbinv_range_index =
-				    r4k_sdcache_wbinv_range_index_generic;
-				mips_cache_ops.mco_sdcache_inv_range =
-				    r4k_sdcache_inv_range_generic;
-				mips_cache_ops.mco_sdcache_wb_range =
-#if 0 /* XXX needs real wb functions for r10k 2way L2 cache */
-				    r4k_sdcache_wb_range_generic;
-#else
-				    r4k_sdcache_wbinv_range_generic;
-#endif
-				break;
-
-			case 128:
-				mips_cache_ops.mco_sdcache_wbinv_all =
-				    r4k_sdcache_wbinv_all_128;
-				mips_cache_ops.mco_sdcache_wbinv_range =
-				    r4k_sdcache_wbinv_range_128;
-				mips_cache_ops.mco_sdcache_wbinv_range_index =
-				    r4k_sdcache_wbinv_range_index_128;
-				mips_cache_ops.mco_sdcache_inv_range =
-				    r4k_sdcache_inv_range_128;
-				mips_cache_ops.mco_sdcache_wb_range =
-#if 0 /* XXX needs real wb functions for r10k 2way L2 cache */
-				    r4k_sdcache_wb_range_128;
-#else
-				    r4k_sdcache_wbinv_range_128;
-#endif
-				break;
-
-			default:
-				panic("r10k sdcache %d way line size %d",
-				    mips_sdcache_ways, mips_sdcache_line_size);
-			}
-			break;
-
-		default:
-			panic("r10k sdcache %d way line size %d",
-			    mips_sdcache_ways, mips_sdcache_line_size);
-		}
+	case MIPS_R12000:
+	case MIPS_R14000:
+		mips_cache_ops.mco_sdcache_wbinv_all =
+		    r10k_sdcache_wbinv_all;
+		mips_cache_ops.mco_sdcache_wbinv_range =
+		    r10k_sdcache_wbinv_range;
+		mips_cache_ops.mco_sdcache_wbinv_range_index =
+		    r10k_sdcache_wbinv_range_index;
+		mips_cache_ops.mco_sdcache_inv_range =
+		    r10k_sdcache_inv_range;
+		mips_cache_ops.mco_sdcache_wb_range =
+		    r10k_sdcache_wb_range;
 		break;
 #endif /* ENABLE_MIPS4_CACHE_R10K */
 #endif /* MIPS3 || MIPS4 */
Index: mips/cache_r10k.c
===================================================================
RCS file: /cvsroot/src/sys/arch/mips/mips/cache_r10k.c,v
retrieving revision 1.1
diff -u -r1.1 cache_r10k.c
--- mips/cache_r10k.c	2003/10/05 11:10:25	1.1
+++ mips/cache_r10k.c	2003/10/25 10:01:15
@@ -1,7 +1,7 @@
-/*	$NetBSD: cache_r10k.c,v 1.1 2003/10/05 11:10:25 tsutsui Exp $	*/
+/*	$NetBSD$	*/
 
-/*
- * Copyright (c) 2003 KIYOHARA Takashi <kiyohara@kk.iij4u.or.jp>
+/*-
+ * Copyright (c) 2003 Takao Shinohara.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -59,53 +59,44 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <sys/cdefs.h>
-
 #include <sys/param.h>
 
 #include <mips/cache.h>
 #include <mips/cache_r4k.h>
-#include <mips/cache_r5k.h>
 #include <mips/cache_r10k.h>
-#include <mips/locore.h>
 
 /*
  * Cache operations for R10000-style caches:
  *
- *	- 2-way set-associative
- *	- Write-back
- *	- Virtually indexed, physically tagged
- *
+ *	2-way, write-back
+ *	primary cache: virtual index/physical tag
+ *	secondary cache: physical index/physical tag
  */
 
-#define	round_line(x)		(((x) + 63) & ~63)
-#define	trunc_line(x)		((x) & ~63)
-
 __asm(".set mips3");
 
+#define	round_line(x)	(((x) + 64 - 1) & ~(64 - 1))
+#define	trunc_line(x)	((x) & ~(64 - 1))
+
 void
-r10k_icache_sync_all_64(void)
+r10k_icache_sync_all(void)
 {
 	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
-	vaddr_t eva = va + mips_picache_size;
-
-	/*
-	 * Since we're hitting the whole thing, we don't have to
-	 * worry about the 2 different "ways".
-	 */
+	vaddr_t eva = va + mips_picache_way_size;
 
 	mips_dcache_wbinv_all();
 
 	__asm __volatile("sync");
 
 	while (va < eva) {
-		cache_r10k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
-		va += (32 * 64);
+		cache_op_r4k_line(va+0, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
+		cache_op_r4k_line(va+1, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
+		va += 64;
 	}
 }
 
 void
-r10k_icache_sync_range_64(vaddr_t va, vsize_t size)
+r10k_icache_sync_range(vaddr_t va, vsize_t size)
 {
 	vaddr_t eva = round_line(va + size);
 
@@ -115,11 +106,6 @@
 
 	__asm __volatile("sync");
 
-	while ((eva - va) >= (32 * 64)) {
-		cache_r10k_op_32lines_64(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
-		va += (32 * 64);
-	}
-
 	while (va < eva) {
 		cache_op_r4k_line(va, CACHE_R4K_I|CACHEOP_R4K_HIT_INV);
 		va += 64;
@@ -127,9 +113,9 @@
 }
 
 void
-r10k_icache_sync_range_index_64(vaddr_t va, vsize_t size)
+r10k_icache_sync_range_index(vaddr_t va, vsize_t size)
 {
-	vaddr_t w2va, eva, orig_va;
+	vaddr_t eva, orig_va;
 
 	orig_va = va;
 
@@ -149,27 +135,183 @@
 	va = MIPS_PHYS_TO_KSEG0(orig_va & mips_picache_way_mask);
 
 	eva = round_line(va + size);
+	va = trunc_line(va);
+
+	while (va < eva) {
+		cache_op_r4k_line(va+0, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
+		cache_op_r4k_line(va+1, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
+		va += 64;
+	}
+}
+
+#undef round_line
+#undef trunc_line
+
+#define	round_line(x)	(((x) + 32 - 1) & ~(32 - 1))
+#define	trunc_line(x)	((x) & ~(32 - 1))
+
+void
+r10k_pdcache_wbinv_all(void)
+{
+	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
+	vaddr_t eva = va + mips_pdcache_way_size;
+
+	while (va < eva) {
+		cache_op_r4k_line(va+0, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
+		cache_op_r4k_line(va+1, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
+		va += 32;
+	}
+}
+
+void
+r10k_pdcache_wbinv_range(vaddr_t va, vsize_t size)
+{
+	vaddr_t eva = round_line(va + size);
+
+	va = trunc_line(va);
+
+	while (va < eva) {
+		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
+		va += 32;
+	}
+}
+
+void
+r10k_pdcache_wbinv_range_index(vaddr_t va, vsize_t size)
+{
+	vaddr_t eva;
+
+	/*
+	 * Since we're doing Index ops, we expect to not be able
+	 * to access the address we've been given.  So, get the
+	 * bits that determine the cache index, and make a KSEG0
+	 * address out of them.
+	 */
+	va = MIPS_PHYS_TO_KSEG0(va & mips_pdcache_way_mask);
+
+	eva = round_line(va + size);
 	va = trunc_line(va);
-	w2va = va + mips_picache_way_size;
 
-	while ((eva - va) >= (16 * 64)) {
-		cache_r10k_op_16lines_64_2way(va, w2va,
-		    CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
-		va   += (16 * 64);
-		w2va += (16 * 64);
+	while (va < eva) {
+		cache_op_r4k_line(va+0, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
+		cache_op_r4k_line(va+1, CACHE_R4K_D|CACHEOP_R4K_INDEX_WB_INV);
+		va += 32;
 	}
+}
 
+void
+r10k_pdcache_inv_range(vaddr_t va, vsize_t size)
+{
+	vaddr_t eva = round_line(va + size);
+
+	va = trunc_line(va);
+
 	while (va < eva) {
-		cache_op_r4k_line(  va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
-		cache_op_r4k_line(w2va, CACHE_R4K_I|CACHEOP_R4K_INDEX_INV);
-		va   += 64;
-		w2va += 64;
+		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_INV);
+		va += 32;
 	}
 }
 
 void
 r10k_pdcache_wb_range(vaddr_t va, vsize_t size)
+{
+	vaddr_t eva = round_line(va + size);
+
+	va = trunc_line(va);
+
+	while (va < eva) {
+		/* R10000 does not support HitWriteBack operation */
+		cache_op_r4k_line(va, CACHE_R4K_D|CACHEOP_R4K_HIT_WB_INV);
+		va += 32;
+	}
+}
+
+#undef round_line
+#undef trunc_line
+
+#define	round_line(x)	(((x) + mips_sdcache_line_size - 1) & ~(mips_sdcache_line_size - 1))
+#define	trunc_line(x)	((x) & ~(mips_sdcache_line_size - 1))
+
+void
+r10k_sdcache_wbinv_all(void)
+{
+	vaddr_t va = MIPS_PHYS_TO_KSEG0(0);
+	vaddr_t eva = va + mips_sdcache_way_size;
+	int line_size = mips_sdcache_line_size;
+
+	while (va < eva) {
+		cache_op_r4k_line(va+0, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
+		cache_op_r4k_line(va+1, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
+		va += line_size;
+	}
+}
+
+void
+r10k_sdcache_wbinv_range(vaddr_t va, vsize_t size)
+{
+	vaddr_t eva = round_line(va + size);
+	int line_size = mips_sdcache_line_size;
+
+	va = trunc_line(va);
+
+	while (va < eva) {
+		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
+		va += line_size;
+	}
+}
+
+void
+r10k_sdcache_wbinv_range_index(vaddr_t va, vsize_t size)
+{
+	vaddr_t eva;
+	int line_size = mips_sdcache_line_size;
+
+	/*
+	 * Since we're doing Index ops, we expect to not be able
+	 * to access the address we've been given.  So, get the
+	 * bits that determine the cache index, and make a KSEG0
+	 * address out of them.
+	 */
+	va = MIPS_PHYS_TO_KSEG0(va & mips_sdcache_way_mask);
+
+	eva = round_line(va + size);
+	va = trunc_line(va);
+
+	while (va < eva) {
+		cache_op_r4k_line(va+0, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
+		cache_op_r4k_line(va+1, CACHE_R4K_SD|CACHEOP_R4K_INDEX_WB_INV);
+		va += line_size;
+	}
+}
+
+void
+r10k_sdcache_inv_range(vaddr_t va, vsize_t size)
+{
+	vaddr_t eva = round_line(va + size);
+	int line_size = mips_sdcache_line_size;
+
+	va = trunc_line(va);
+
+	while (va < eva) {
+		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_INV);
+		va += line_size;
+	}
+}
+
+void
+r10k_sdcache_wb_range(vaddr_t va, vsize_t size)
 {
-	/* R10000 processor does not support */
+	vaddr_t eva = round_line(va + size);
+	int line_size = mips_sdcache_line_size;
+
+	va = trunc_line(va);
+
+	while (va < eva) {
+		/* R10000 does not support HitWriteBack operation */
+		cache_op_r4k_line(va, CACHE_R4K_SD|CACHEOP_R4K_HIT_WB_INV);
+		va += line_size;
+	}
 }
 
+#undef round_line
+#undef trunc_line