Subject: Re: 1.5S vs sparc/MP
To: None <tech-smp@netbsd.org>
From: Simon J. Gerraty <sjg@quick.com.au>
List: tech-smp
Date: 02/26/2001 02:36:35
Ok, I decided to try my idea - for avoiding the panic seen on MP
hypersparcs.   I added semaphores to the kernel, and
added appropriate operations to the sparc flush routines.

It no longer panics, however I now reliably get a Watchdog Reset,
which I used to see perhaps one boot in 5 - it would panic the other 4
times.  Not sure if I've made progress or not.

Ok, so I added some ugly printfs to the semaphore operations.
Interestingly, the cache flush routines get called many times before
cpu0 is attached (73 times in fact):

>> NetBSD/sparc Secondary Boot, Revision 1.9
>> (root@flambard, Thu Nov 23 03:02:02 MET 2000)
Booting netbsd.mp
2299870+118136+331088 [68+133072+104831]=0x2e9600
OBP version 3, revision 2.25 (plugin rev 2)
sema_v(0xf02b0f9c) == 1
sema_v(0xf02b0f9c) == 2
sema_v(0xf02b0f9c) == 3
sema_v(0xf02b0f9c) == 4
....
sema_v(0xf02b0f9c) == 72
[ using 238424 bytes of netbsd ELF symbol table ]
Copyright (c) 1996, 1997, 1998, 1999, 2000, 2001
    The NetBSD Foundation, Inc.  All rights reserved.
Copyright (c) 1982, 1986, 1989, 1991, 1993
    The Regents of the University of California.  All rights reserved.

NetBSD 1.5S (TOO.MP) #3: Mon Feb 26 02:14:44 PST 2001
    root@too:/l0/NetBSD/obj/sys/compile/TOO.MP
total memory = 159 MB
avail memory = 144 MB
using 896 buffers containing 8264 KB of memory
sema_v(0xf02b0f9c) == 73
bootpath: /iommu@f,e0000000/sbus@f,e0001000/espdma@f,400000/esp@f,800000/sd@3,0
mainbus0 (root): SUNW,SPARCstation-20
cpu0 at mainbus0sema_init(0xf02b0f9c, 0, cache_flush)
: mid 8sema_v(0xf02b0f9c) == 1
: RT620/625 @ 100 MHz, on-chip FPU
cpu0: 256K byte write-back, 64 bytes/line, sw flush: cache enabled
cpu1 at mainbus0sema_v(0xf02b0f9c) == 2
: mid 9sema_v(0xf02b0f9c) == 3
sema_v(0xf02b0f9c) == 4

Watchdog Reset
Type  help  for more information
<#1> ok 

We never got as far as calling sema_p() before the watchdog reset
occurred.  Probably because we are still using only the sp cache flush
routines up to this point.  Without the printfs, the boot reliably
gets to:

SUNW,DBRIe at sbus0 slot 14 offset 0x10000 level 9 not configured
qec0 at sbus0 slot 2 offset 0x20000 level 4 (ipl 7): 128K memory
qe0 at qec0 slot 0 offset 0x0 rev 1 address 08:00:20:72:58:20
qe1 at qec0 slot 1 offset 0x0 rev 1 address 08:00:20:72:58:20
qe2 at qec0 slot 2 offset 0x0 rev 1 address 08:00:20:72:58:20
qe3 at qec0 slot 3 offset 0x0 rev 1 address 08:00:20:72:58:20
eccmemctl0 at mainbus0: version 0x0/0x2
scsibus0: waiting 2 seconds for devices to settle...
esp0: target 3: sync negotiation
sd0 at scsibus0 target 3 lun 0: <SEAGATE, ST34371W SUN4.2G, 7462> SCSI2 0/direct fixed
sd0(esp0:3:0): max sync rate 8.06MB/s
sd0: 4094 MB, 3882 cyl, 16 head, 135 sec, 512 bytes/sect x 8385121 sectors
IPsec: Initialized Security Association Processing.
root on sd0a dumps on sd0b
root file system type: ffs
Asyn 

Watchdog Reset

here, which is where it used to reliably panic.
I'm guessing that the semaphore stuff is not the cause of the 
"Watchdog Reset"... 

Here's the diffs to sparc/cache.c etc if anyone cares to comment.
The idea is that the sp cache routines all do a V() when they are
done, and the smp cache routines call a routine at the end which
should do an appropriate number of P()'s to ensure that all cpu's have
done their flush before returning.

BTW, I've only targeted the routines that the hypersparc uses for now.

--sjg

Index: sparc/cache.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/sparc/sparc/cache.c,v
retrieving revision 1.53
diff -u -p -r1.53 cache.c
--- sparc/cache.c	2000/06/08 14:45:18	1.53
+++ sparc/cache.c	2001/02/26 09:54:54
@@ -70,6 +70,12 @@
 #include <sparc/sparc/cache.h>
 #include <sparc/sparc/cpuvar.h>
 
+#ifdef MULTIPROCESSOR
+# include <sys/ksem.h>
+
+sema_t cache_semaphore;
+#endif
+
 struct cachestats cachestats;
 
 int cache_alias_dist;		/* Cache anti-aliasing constants */
@@ -537,6 +543,9 @@ srmmu_vcache_flush_context()
 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
 	for (; --i >= 0; p += ls)
 		sta(p, ASI_IDCACHELFC, 0);
+#ifdef MULTIPROCESSOR
+	sema_v(&cache_semaphore);
+#endif
 }
 
 /*
@@ -559,6 +568,9 @@ srmmu_vcache_flush_region(vreg)
 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
 	for (; --i >= 0; p += ls)
 		sta(p, ASI_IDCACHELFR, 0);
+#ifdef MULTIPROCESSOR
+	sema_v(&cache_semaphore);
+#endif
 }
 
 /*
@@ -583,6 +595,9 @@ srmmu_vcache_flush_segment(vreg, vseg)
 	i = CACHEINFO.c_totalsize >> CACHEINFO.c_l2linesize;
 	for (; --i >= 0; p += ls)
 		sta(p, ASI_IDCACHELFS, 0);
+#ifdef MULTIPROCESSOR
+	sema_v(&cache_semaphore);
+#endif
 }
 
 /*
@@ -608,6 +623,9 @@ srmmu_vcache_flush_page(va)
 	i = NBPG >> CACHEINFO.c_l2linesize;
 	for (; --i >= 0; p += ls)
 		sta(p, ASI_IDCACHELFP, 0);
+#ifdef MULTIPROCESSOR
+	sema_v(&cache_semaphore);
+#endif
 }
 
 /*
@@ -645,6 +663,9 @@ srmmu_cache_flush(base, len)
 		p = (char *)((int)base & -ls);
 		for (; --i >= 0; p += ls)
 			sta(p, ASI_IDCACHELFP, 0);
+#ifdef MULTIPROCESSOR
+		sema_v(&cache_semaphore);
+#endif
 		return;
 	}
 
@@ -681,6 +702,9 @@ srmmu_cache_flush(base, len)
 		i <<= PGSHIFT - CACHEINFO.c_l2linesize;
 		for (; --i >= 0; p += ls)
 			sta(p, ASI_IDCACHELFP, 0);
+#ifdef MULTIPROCESSOR
+		sema_v(&cache_semaphore);
+#endif
 		return;
 	}
 	baseoff = (u_int)base & SGOFSET;
@@ -911,6 +935,28 @@ viking_pcache_flush_page(pa, invalidate_
 
 #if defined(MULTIPROCESSOR)
 /*
+ * Called by the boot cpu, before spinning up the rest.
+ */
+
+void
+cache_sema_init (void)
+{
+	sema_init(&cache_semaphore, 0, "cache_flush");
+}
+
+static void
+cache_sema_wait (void)
+{
+	int n;
+	
+	for (n = 0; n < ncpu; n++) {
+		if (cpus[n] == NULL)
+			continue;
+		sema_p(&cache_semaphore);
+	}
+}
+
+/*
  * Cache flushing on multi-processor systems involves sending
  * inter-processor messages to flush the cache on each module.
  *
@@ -940,6 +986,7 @@ smp_vcache_flush_page(va)
 		raise_ipi(cpi);
 		splx(s);
 	}
+	cache_sema_wait();
 }
 
 void
@@ -963,6 +1010,7 @@ smp_vcache_flush_segment(vr, vs)
 		raise_ipi(cpi);
 		splx(s);
 	}
+	cache_sema_wait();
 }
 
 void
@@ -985,6 +1033,7 @@ smp_vcache_flush_region(vr)
 		raise_ipi(cpi);
 		splx(s);
 	}
+	cache_sema_wait();
 }
 
 void
@@ -1005,6 +1054,7 @@ smp_vcache_flush_context()
 		raise_ipi(cpi);
 		splx(s);
 	}
+	cache_sema_wait();
 }
 
 void
@@ -1029,5 +1079,6 @@ smp_cache_flush(va, size)
 		raise_ipi(cpi);
 		splx(s);
 	}
+	cache_sema_wait();
 }
 #endif /* MULTIPROCESSOR */
Index: sparc/cache.h
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/sparc/sparc/cache.h,v
retrieving revision 1.22
diff -u -p -r1.22 cache.h
--- sparc/cache.h	2000/06/06 07:56:40	1.22
+++ sparc/cache.h	2001/02/26 09:54:54
@@ -203,6 +203,7 @@ extern void sparc_noop __P((void));
  * The SMP versions of the cache flush functions. These functions
  * send a "cache flush" message to each processor.
  */
+void	cache_sema_init __P((void));
 void	smp_vcache_flush_context __P((void));	/* flush current context */
 void	smp_vcache_flush_region __P((int));	/* flush region in cur ctx */
 void	smp_vcache_flush_segment __P((int, int));/* flush seg in cur ctx */
Index: sparc/cpu.c
===================================================================
RCS file: /cvsroot/syssrc/sys/arch/sparc/sparc/cpu.c,v
retrieving revision 1.110
diff -u -p -r1.110 cpu.c
--- sparc/cpu.c	2001/01/21 07:48:30	1.110
+++ sparc/cpu.c	2001/02/26 09:54:54
@@ -295,6 +295,8 @@ static	int cpu_instance;
 		getcpuinfo(&cpuinfo, node);
 
 #if defined(MULTIPROCESSOR)
+		cache_sema_init();
+		
 		/*
 		 * Allocate a suitable global VA for the boot CPU's
 		 * cpu_info (which is already statically allocated),