Subject: Re: Re: sleep sleeps forever (again)
To: Steve Woodford <scw@netbsd.org>
From: Joel CARNAT <joel@carnat.net>
List: port-sparc64
Date: 09/24/2004 17:06:12
my lovely U10 just freezed (again) when compiling nget.
my kernel was freeshly cvsed up, plus I applied your patch.

'don't know how usefull it could be, but the last info sysstat gave me
(I had "systat vmstat 5" running in an ssh connection) was :

****************************************
    3 users    Load  1.49  1.38  1.23                  Fri Sep 24 16:42:39

Proc:r  d  s  w    Csw   Trp   Sys  Int  Sof   Flt            PAGING   SWAPPING
     1     2        22   111    59  206        191            in  out   in  out
                                                      ops           4         1
   0.4% Sy  99.6% Us   0.0% Ni   0.0% In   0.0% Id    pages        29
|    |    |    |    |    |    |    |    |    |    |
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>                        forks
                                                                          fkppw
          memory totals (in kB)              206 Interrupts               fksvm
         real   virtual    free              100 intr lev1                pwait
Active 141904    157280    3600                5 intr lev5                relck
All    244256    259632  512880                1 intr lev6                rlkok
                                             100 intr clock               noram
Namei         Sys-cache     Proc-cache                                    ndcpy
    Calls     hits    %     hits     %                                    fltcp
        6        6  100                                                94 zfod
                                                                          cow
Disks:   md0   wd0   cd0                                               32 fmin
 seeks                                                                 42 ftarg
 xfers           5                                                   9079 itarg
 bytes        263K                                                     35 wired
 %busy         1.4                                                        pdfre
                                                                       30 pdscn
 Sys-cache     Proc-cache                                    ndcpy
    Calls     hits    %     hits     %                                    fltcp
        6        6  100                                                94 zfod
                                                                          cow
Disks:   md0   wd0   cd0                                               32 fmin
 seeks                                                                 42 ftarg
 xfers           5                                                   9079 itarg
 bytes        263K                                                     35 wired
 %busy         1.4                                                        pdfre
                                                                       30 pdscn
Write failed: Host is downery 5 seconds.
****************************************

As usual, nothing of the minicom/console and nothing possible except
hard off/on.

On Thu, Sep 23 2004 - 22:00, Steve Woodford wrote:
> On Thursday 23 September 2004 08:43, matthew green wrote:
> > my ultra10/440 experienced the sleep forever bug twice a couple of
> > weeks ago.  it happens on any sparc64 box.  it seems to be more
> > likely to occur if the disk and network are busy (eg, both the
> > above happened when i started writing and read heavily over NFS
> > at the same time.)
> 
> Just on a whim, can someone try out the attached patch to 
> sys/arch/sparc64/include/psl.h to see if it cures the sleep forever 
> bug?
> 
> There's a chance that gcc is reordering instructions around some spl* 
> calls. The patch should address this.
> 
> Cheers, Steve

> Index: psl.h
> ===================================================================
> RCS file: /cvsroot/src/sys/arch/sparc64/include/psl.h,v
> retrieving revision 1.26
> diff -u -r1.26 psl.h
> --- psl.h	14 Mar 2004 18:18:54 -0000	1.26
> +++ psl.h	23 Sep 2004 20:52:31 -0000
> @@ -303,6 +303,7 @@
>  {
>  	int pstate = getpstate();
>  
> +	__insn_barrier();
>  	setpstate(pstate & ~PSTATE_IE);
>  	return (pstate);
>  }
> @@ -311,6 +312,7 @@
>  intr_restore(int pstate)
>  {
>  	setpstate(pstate);
> +	__insn_barrier();
>  }
>  
>  /*
> @@ -334,9 +336,11 @@
>  static __inline int name##X(const char* file, int line) \
>  { \
>  	int oldpil; \
> +	__insn_barrier(); \
>  	__asm __volatile("rdpr %%pil,%0" : "=r" (oldpil)); \
>  	SPLPRINT(("{%s:%d %d=>%d}", file, line, oldpil, newpil)); \
>  	__asm __volatile("wrpr %%g0,%0,%%pil" : : "n" (newpil)); \
> +	__insn_barrier(); \
>  	return (oldpil); \
>  }
>  /* A non-priority-decreasing version of SPL */
> @@ -345,11 +349,13 @@
>  static __inline int name##X(const char* file, int line) \
>  { \
>  	int oldpil; \
> +	__insn_barrier(); \
>  	__asm __volatile("rdpr %%pil,%0" : "=r" (oldpil)); \
> -	if (newpil <= oldpil) \
> -		return oldpil; \
> -	SPLPRINT(("{%s:%d %d->!d}", file, line, oldpil, newpil)); \
> -	__asm __volatile("wrpr %%g0,%0,%%pil" : : "n" (newpil)); \
> +	if (newpil > oldpil) {\
> +		SPLPRINT(("{%s:%d %d->!d}", file, line, oldpil, newpil)); \
> +		__asm __volatile("wrpr %%g0,%0,%%pil" : : "n" (newpil)); \
> +	} \
> +	__insn_barrier(); \
>  	return (oldpil); \
>  }
>  
> @@ -360,8 +366,10 @@
>  static __inline int name() \
>  { \
>  	int oldpil; \
> +	__insn_barrier(); \
>  	__asm __volatile("rdpr %%pil,%0" : "=r" (oldpil)); \
>  	__asm __volatile("wrpr %%g0,%0,%%pil" : : "n" (newpil)); \
> +	__insn_barrier(); \
>  	return (oldpil); \
>  }
>  /* A non-priority-decreasing version of SPL */
> @@ -370,10 +378,11 @@
>  static __inline int name() \
>  { \
>  	int oldpil; \
> +	__insn_barrier(); \
>  	__asm __volatile("rdpr %%pil,%0" : "=r" (oldpil)); \
> -	if (newpil <= oldpil) \
> -		return oldpil; \
> -	__asm __volatile("wrpr %%g0,%0,%%pil" : : "n" (newpil)); \
> +	if (newpil > oldpil) \
> +		__asm __volatile("wrpr %%g0,%0,%%pil" : : "n" (newpil)); \
> +	__insn_barrier(); \
>  	return (oldpil); \
>  }
>  #endif
> @@ -474,7 +483,9 @@
>  	__asm __volatile("rdpr %%pil,%0" : "=r" (pil));
>  	SPLPRINT(("{%d->%d}", pil, newpil));
>  #endif
> +	__insn_barrier();
>  	__asm __volatile("wrpr %%g0,%0,%%pil" : : "rn" (newpil));
> +	__insn_barrier();
>  }
>  #endif /* KERNEL && !_LOCORE */
>  


-- 
,-- This mail runs ---------.
`------------ NetBSD/i386 --'