NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
toolchain/50112: GCC is broken on NetBSD/Alpha Current,7-0-RC2,7-0-RC1
>Number: 50112
>Category: toolchain
>Synopsis: GCC is broken on NetBSD/Alpha Current,7-0-RC2,7-0-RC1
>Confidential: no
>Severity: critical
>Priority: high
>Responsible: toolchain-manager
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Sat Aug 01 15:10:00 +0000 2015
>Originator: nullnilaki
>Release: NetBSD/Alpha Current,7-0-RC2,7-0-RC1
>Organization:
japan
>Environment:
NetBSD 7.99.20 NetBSD 7.99.20 (GENERIC-$Revision: 1.364 $) #1: Sat Aug 1 12:15:01 UTC 2015 naruaki@:/usr/obj.alpha/sys/arch/alpha/compile/GENERIC alpha
#
>Description:
I compile this code.
(See Himeno benchmark http://accc.riken.jp/2444.htm)
>----------------------------------------------------<
/********************************************************************
This benchmark test program is measuring a cpu performance
of floating point operation by a Poisson equation solver.
If you have any question, please ask me via email.
written by Ryutaro HIMENO, November 26, 2001.
Version 3.0
----------------------------------------------
Ryutaro Himeno, Dr. of Eng.
Head of Computer Information Division,
RIKEN (The Institute of Pysical and Chemical Research)
Email : himeno%postman.riken.go.jp@localhost
---------------------------------------------------------------
You can adjust the size of this benchmark code to fit your target
computer. In that case, please chose following sets of
(mimax,mjmax,mkmax):
small : 33,33,65
small : 65,65,129
midium: 129,129,257
large : 257,257,513
ext.large: 513,513,1025
This program is to measure a computer performance in MFLOPS
by using a kernel which appears in a linear solver of pressure
Poisson eq. which appears in an incompressible Navier-Stokes solver.
A point-Jacobi method is employed in this solver as this method can
be easyly vectrized and be parallelized.
------------------
Finite-difference method, curvilinear coodinate system
Vectorizable and parallelizable on each grid point
No. of grid points : imax x jmax x kmax including boundaries
------------------
A,B,C:coefficient matrix, wrk1: source term of Poisson equation
wrk2 : working area, OMEGA : relaxation parameter
BND:control variable for boundaries and objects ( = 0 or 1)
P: pressure
********************************************************************/
#include <stdio.h>
#define MIMAX 65
#define MJMAX 65
#define MKMAX 129
double second();
float jacobi();
void initmt();
double fflop(int,int,int);
double mflops(int,double,double);
static float p[MIMAX][MJMAX][MKMAX];
static float a[4][MIMAX][MJMAX][MKMAX],
b[3][MIMAX][MJMAX][MKMAX],
c[3][MIMAX][MJMAX][MKMAX];
static float bnd[MIMAX][MJMAX][MKMAX];
static float wrk1[MIMAX][MJMAX][MKMAX],
wrk2[MIMAX][MJMAX][MKMAX];
static int imax, jmax, kmax;
static float omega;
int
main()
{
int i,j,k,nn;
float gosa;
double cpu,cpu0,cpu1,flop,target;
target= 60.0;
omega= 0.8;
imax = MIMAX-1;
jmax = MJMAX-1;
kmax = MKMAX-1;
/*
* Initializing matrixes
*/
initmt();
printf("mimax = %d mjmax = %d mkmax = %d\n",MIMAX, MJMAX, MKMAX);
printf("imax = %d jmax = %d kmax =%d\n",imax,jmax,kmax);
nn= 3;
printf(" Start rehearsal measurement process.\n");
printf(" Measure the performance in %d times.\n\n",nn);
cpu0= second();
gosa= jacobi(nn);
cpu1= second();
cpu= cpu1 - cpu0;
flop= fflop(imax,jmax,kmax);
printf(" MFLOPS: %f time(s): %f %e\n\n",
mflops(nn,cpu,flop),cpu,gosa);
nn= (int)(target/(cpu/3.0));
printf(" Now, start the actual measurement process.\n");
printf(" The loop will be excuted in %d times\n",nn);
printf(" This will take about one minute.\n");
printf(" Wait for a while\n\n");
/*
* Start measuring
*/
cpu0 = second();
gosa = jacobi(nn);
cpu1 = second();
cpu= cpu1 - cpu0;
printf(" Loop executed for %d times\n",nn);
printf(" Gosa : %e \n",gosa);
printf(" MFLOPS measured : %f\tcpu : %f\n",mflops(nn,cpu,flop),cpu);
printf(" Score based on Pentium III 600MHz : %f\n",
mflops(nn,cpu,flop)/82,84);
return (0);
}
void
initmt()
{
int i,j,k;
for(i=0 ; i<MIMAX ; i++)
for(j=0 ; j<MJMAX ; j++)
for(k=0 ; k<MKMAX ; k++){
a[0][i][j][k]=0.0;
a[1][i][j][k]=0.0;
a[2][i][j][k]=0.0;
a[3][i][j][k]=0.0;
b[0][i][j][k]=0.0;
b[1][i][j][k]=0.0;
b[2][i][j][k]=0.0;
c[0][i][j][k]=0.0;
c[1][i][j][k]=0.0;
c[2][i][j][k]=0.0;
p[i][j][k]=0.0;
wrk1[i][j][k]=0.0;
bnd[i][j][k]=0.0;
}
for(i=0 ; i<imax ; i++)
for(j=0 ; j<jmax ; j++)
for(k=0 ; k<kmax ; k++){
a[0][i][j][k]=1.0;
a[1][i][j][k]=1.0;
a[2][i][j][k]=1.0;
a[3][i][j][k]=1.0/6.0;
b[0][i][j][k]=0.0;
b[1][i][j][k]=0.0;
b[2][i][j][k]=0.0;
c[0][i][j][k]=1.0;
c[1][i][j][k]=1.0;
c[2][i][j][k]=1.0;
p[i][j][k]=(float)(i*i)/(float)((imax-1)*(imax-1));
wrk1[i][j][k]=0.0;
bnd[i][j][k]=1.0;
}
}
float
jacobi(int nn)
{
int i,j,k,n;
float gosa, s0, ss;
for(n=0 ; n<nn ; ++n){
gosa = 0.0;
for(i=1 ; i<imax-1 ; i++)
for(j=1 ; j<jmax-1 ; j++)
for(k=1 ; k<kmax-1 ; k++){
s0 = a[0][i][j][k] * p[i+1][j ][k ]
+ a[1][i][j][k] * p[i ][j+1][k ]
+ a[2][i][j][k] * p[i ][j ][k+1]
+ b[0][i][j][k] * ( p[i+1][j+1][k ] - p[i+1][j-1][k ]
- p[i-1][j+1][k ] + p[i-1][j-1][k ] )
+ b[1][i][j][k] * ( p[i ][j+1][k+1] - p[i ][j-1][k+1]
- p[i ][j+1][k-1] + p[i ][j-1][k-1] )
+ b[2][i][j][k] * ( p[i+1][j ][k+1] - p[i-1][j ][k+1]
- p[i+1][j ][k-1] + p[i-1][j ][k-1] )
+ c[0][i][j][k] * p[i-1][j ][k ]
+ c[1][i][j][k] * p[i ][j-1][k ]
+ c[2][i][j][k] * p[i ][j ][k-1]
+ wrk1[i][j][k];
ss = ( s0 * a[3][i][j][k] - p[i][j][k] ) * bnd[i][j][k];
gosa+= ss*ss;
/* gosa= (gosa > ss*ss) ? a : b; */
wrk2[i][j][k] = p[i][j][k] + omega * ss;
}
for(i=1 ; i<imax-1 ; ++i)
for(j=1 ; j<jmax-1 ; ++j)
for(k=1 ; k<kmax-1 ; ++k)
p[i][j][k] = wrk2[i][j][k];
} /* end n loop */
return(gosa);
}
double
fflop(int mx,int my, int mz)
{
return((double)(mz-2)*(double)(my-2)*(double)(mx-2)*34.0);
}
double
mflops(int nn,double cpu,double flop)
{
return(flop/cpu*1.e-6*(double)nn);
}
#include <sys/time.h>
double
second()
{
struct timeval tm;
double t ;
static int base_sec = 0,base_usec = 0;
gettimeofday(&tm, NULL);
if(base_sec == 0 && base_usec == 0)
{
base_sec = tm.tv_sec;
base_usec = tm.tv_usec;
t = 0.0;
} else {
t = (double) (tm.tv_sec-base_sec) +
((double) (tm.tv_usec-base_usec))/1.0e6 ;
}
return t ;
}
>----------------------------------------------------<
This is the result on NetBSD/Alpha Current,7-0-RC2,7-0-RC1.
MFLOPS measured : 0.000000 is wrong.
>----------------------------------------------------<
# uname -a
NetBSD 7.99.20 NetBSD 7.99.20 (GENERIC-$Revision: 1.364 $) #1: Sat Aug 1 12:15:01 UTC 2015 naruaki@:/usr/obj.alpha/sys/arch/alpha/compile/GENERIC alpha
# gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/lto-wrapper
Target: alpha--netbsd
Configured with: /usr/src/tools/gcc/../../external/gpl3/gcc/dist/configure --target=alpha--netbsd --enable-long-long --enable-threads --with-bugurl=http://www.NetBSD.org/Misc/send-pr.html --with-pkgversion='NetBSD nb2 20150115' --with-system-zlib --enable-__cxa_atexit --enable-libstdcxx-threads --enable-libstdcxx-time=rt --enable-lto --with-mpc-lib=/var/obj/mknative/alpha/usr/src/external/lgpl3/mpc/lib/libmpc --with-mpfr-lib=/var/obj/mknative/alpha/usr/src/external/lgpl3/mpfr/lib/libmpfr --with-gmp-lib=/var/obj/mknative/alpha/usr/src/external/lgpl3/gmp/lib/libgmp --with-mpc-include=/usr/src/external/lgpl3/mpc/dist/src --with-mpfr-include=/usr/src/external/lgpl3/mpfr/dist/src --with-gmp-include=/usr/src/external/lgpl3/gmp/lib/libgmp/arch/alpha --enable-tls --disable-multilib --disable-symvers --disable-libstdcxx-pch --build=x86_64-unknown-netbsd6.0. --host=alpha--netbsd --with-sysroot=/var/obj/mknative/alpha/usr/src/destdir.alpha
Thread model: posix
gcc version 4.8.5 (nb2 20150115)
# gcc benchmark.c -o benchmark
# ./benchmark
mimax = 65 mjmax = 65 mkmax = 129
imax = 64 jmax = 64 kmax =128
Start rehearsal measurement process.
Measure the performance in 3 times.
MFLOPS: 0.000000 time(s): 4294967296.822944 3.288628e-03
Now, start the actual measurement process.
The loop will be excuted in 0 times
This will take about one minute.
Wait for a while
Loop executed for 0 times
Gosa : 1.396971e-309
MFLOPS measured : 0.000000 cpu : 0.000002
Score based on Pentium III 600MHz : 0.000000
>----------------------------------------------------<
This is the result on Tru64 UNIX.
MFLOPS measured : 60.943671 is good.
>----------------------------------------------------<
s15> uname -a
OSF1 ds15 V5.1 2650 alpha
ds15> /usr/users/naruaki/gcc/local/gcc4/bin/gcc -v
Using built-in specs.
Target: alpha-dec-osf5.1b
Configured with: ../gcc-4.2.3/configure --prefix=/usr/local/gcc4 --enable-languages=c,c++ --enable-threads=posix --disable-nls --host=alpha-dec-osf5.1b --without-gnu-ld --with-ld=/usr/ccs/bin/ld --without-gnu-as --with-as=/usr/bin/as --disable-libssp
Thread model: posix
gcc version 4.2.3
ds15> /usr/users/naruaki/gcc/local/gcc4/bin/gcc benchmark.c -o benchmark
ds15> ./benchmark
mimax = 65 mjmax = 65 mkmax = 129
imax = 64 jmax = 64 kmax =128
Start rehearsal measurement process.
Measure the performance in 3 times.
MFLOPS: 60.657997 time(s): 0.814453 3.288628e-03
Now, start the actual measurement process.
The loop will be excuted in 221 times
This will take about one minute.
Wait for a while
Loop executed for 221 times
Gosa : 1.604803e-03
MFLOPS measured : 60.943671 cpu : 59.716797
Score based on Pentium III 600MHz : 0.743215
>----------------------------------------------------<
I compile(gcc -S) the code on Tru64 UNIX.
>----------------------------------------------------<
ds15> uname -a
OSF1 ds15 V5.1 2650 alpha
ds15> /usr/users/naruaki/gcc/local/gcc4/bin/gcc -S benchmark.c
ds15> more benchmark.s
.file 1 "benchmark.c"
.verstamp 3 11
.set noreorder
.set volatile
.set noat
.arch ev4
.rdata
$LC2:
.ascii "mimax = %d mjmax = %d mkmax = %d\12\0"
$LC3:
.ascii "imax = %d jmax = %d kmax =%d\12\0"
$LC4:
.ascii " Start rehearsal measurement process.\0"
$LC5:
.ascii " Measure the performance in %d times.\12\12\0"
$LC6:
.ascii " MFLOPS: %f time(s): %f %e\12\12\0"
$LC8:
.ascii " Now, start the actual measurement process.\0"
$LC9:
.ascii " The loop will be excuted in %d times\12\0"
$LC10:
.ascii " This will take about one minute.\0"
$LC11:
.ascii " Wait for a while\12\0"
$LC12:
.ascii " Loop executed for %d times\12\0"
$LC13:
.ascii " Gosa : %e \12\0"
$LC14:
.ascii " MFLOPS measured : %f\11cpu : %f\12\0"
$LC16:
.ascii " Score based on Pentium III 600MHz : %f\12\0"
.align 3
$LC0:
.long 0
.long 1078853632
.align 2
$LC1:
.long 1061997773
.align 3
$LC7:
.long 0
.long 1074266112
.align 3
$LC15:
.long 0
.long 1079279616
.text
.align 2
.globl main
.ent main
main:
.frame $15,96,$26,0
.mask 0x4008000,-96
ldgp $29,0($27)
$main..ng:
lda $30,-96($30)
stq $26,0($30)
stq $15,8($30)
bis $31,$30,$15
.prologue 1
lda $1,$LC0
ldt $f10,0($1)
stt $f10,16($15)
lda $2,omega
lda $1,$LC1
lds $f10,0($1)
sts $f10,0($2)
lda $2,imax
lda $1,64($31)
stl $1,0($2)
lda $2,jmax
lda $1,64($31)
stl $1,0($2)
lda $2,kmax
lda $1,128($31)
stl $1,0($2)
jsr $26,initmt
ldgp $29,0($26)
lda $16,$LC2
lda $17,65($31)
lda $18,65($31)
lda $19,129($31)
jsr $26,printf
ldgp $29,0($26)
lda $1,imax
ldl $3,0($1)
lda $1,jmax
ldl $2,0($1)
lda $1,kmax
ldl $1,0($1)
lda $16,$LC3
bis $31,$3,$17
bis $31,$2,$18
bis $31,$1,$19
jsr $26,printf
ldgp $29,0($26)
lda $1,3($31)
stl $1,60($15)
lda $16,$LC4
jsr $26,puts
ldgp $29,0($26)
ldl $1,60($15)
lda $16,$LC5
bis $31,$1,$17
jsr $26,printf
ldgp $29,0($26)
jsr $26,second
ldgp $29,0($26)
cpys $f0,$f0,$f10
stt $f10,40($15)
ldl $1,60($15)
bis $31,$1,$16
jsr $26,jacobi
ldgp $29,0($26)
cpys $f0,$f0,$f10
sts $f10,56($15)
jsr $26,second
ldgp $29,0($26)
cpys $f0,$f0,$f10
stt $f10,32($15)
ldt $f11,32($15)
ldt $f10,40($15)
subt $f11,$f10,$f10
stt $f10,48($15)
lda $1,imax
ldl $2,0($1)
lda $1,jmax
ldl $3,0($1)
lda $1,kmax
ldl $1,0($1)
bis $31,$2,$16
bis $31,$3,$17
bis $31,$1,$18
jsr $26,fflop
ldgp $29,0($26)
cpys $f0,$f0,$f10
stt $f10,24($15)
ldl $1,60($15)
bis $31,$1,$16
ldt $f17,48($15)
ldt $f18,24($15)
jsr $26,mflops
ldgp $29,0($26)
cpys $f0,$f0,$f10
lds $f11,56($15)
lda $16,$LC6
cpys $f10,$f10,$f17
ldt $f18,48($15)
cpys $f11,$f11,$f19
jsr $26,printf
ldgp $29,0($26)
ldt $f11,48($15)
lda $1,$LC7
ldt $f10,0($1)
divt $f11,$f10,$f11
ldt $f10,16($15)
divt $f10,$f11,$f10
cvttqc $f10,$f10
stt $f10,80($15)
ldq $1,80($15)
stl $1,60($15)
lda $16,$LC8
jsr $26,puts
ldgp $29,0($26)
ldl $1,60($15)
lda $16,$LC9
bis $31,$1,$17
jsr $26,printf
ldgp $29,0($26)
lda $16,$LC10
jsr $26,puts
ldgp $29,0($26)
lda $16,$LC11
jsr $26,puts
ldgp $29,0($26)
jsr $26,second
ldgp $29,0($26)
cpys $f0,$f0,$f10
stt $f10,40($15)
ldl $1,60($15)
bis $31,$1,$16
jsr $26,jacobi
ldgp $29,0($26)
cpys $f0,$f0,$f10
sts $f10,56($15)
jsr $26,second
ldgp $29,0($26)
cpys $f0,$f0,$f10
stt $f10,32($15)
ldt $f11,32($15)
ldt $f10,40($15)
subt $f11,$f10,$f10
stt $f10,48($15)
ldl $1,60($15)
lda $16,$LC12
bis $31,$1,$17
jsr $26,printf
ldgp $29,0($26)
lds $f10,56($15)
lda $16,$LC13
cpys $f10,$f10,$f17
jsr $26,printf
ldgp $29,0($26)
ldl $1,60($15)
bis $31,$1,$16
ldt $f17,48($15)
ldt $f18,24($15)
jsr $26,mflops
ldgp $29,0($26)
cpys $f0,$f0,$f10
lda $16,$LC14
cpys $f10,$f10,$f17
ldt $f18,48($15)
jsr $26,printf
ldgp $29,0($26)
ldl $1,60($15)
bis $31,$1,$16
ldt $f17,48($15)
ldt $f18,24($15)
jsr $26,mflops
ldgp $29,0($26)
cpys $f0,$f0,$f11
lda $1,$LC15
ldt $f10,0($1)
divt $f11,$f10,$f10
lda $16,$LC16
cpys $f10,$f10,$f17
lda $18,84($31)
jsr $26,printf
ldgp $29,0($26)
bis $31,$31,$1
bis $31,$1,$0
bis $31,$15,$30
ldq $26,0($30)
ldq $15,8($30)
lda $30,96($30)
ret $31,($26),1
.end main
.rdata
.align 2
$LC17:
.long 1065353216
.align 2
$LC18:
.long 1042983595
.text
.align 2
.globl initmt
.ent initmt
initmt:
.frame $15,48,$26,0
.mask 0x4008000,-48
ldgp $29,0($27)
$initmt..ng:
lda $30,-48($30)
stq $26,0($30)
stq $15,8($30)
bis $31,$30,$15
.prologue 1
stl $31,24($15)
br $31,$L4
$L5:
stl $31,20($15)
br $31,$L6
$L7:
stl $31,16($15)
br $31,$L8
$L9:
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,33($31)
lda $1,17412($1)
addq $2,$1,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,67($31)
lda $1,-30712($1)
addq $2,$1,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,100($31)
lda $1,-13300($1)
addq $2,$1,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,b
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,b
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,33($31)
lda $1,17412($1)
addq $2,$1,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,b
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,67($31)
lda $1,-30712($1)
addq $2,$1,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,c
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,c
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,33($31)
lda $1,17412($1)
addq $2,$1,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,c
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,67($31)
lda $1,-30712($1)
addq $2,$1,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,wrk1
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,bnd
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
sts $f31,0($1)
ldl $1,16($15)
addl $1,1,$1
stl $1,16($15)
$L8:
ldl $1,16($15)
cmple $1,128,$1
bne $1,$L9
ldl $1,20($15)
addl $1,1,$1
stl $1,20($15)
$L6:
ldl $1,20($15)
cmple $1,64,$1
bne $1,$L7
ldl $1,24($15)
addl $1,1,$1
stl $1,24($15)
$L4:
ldl $1,24($15)
cmple $1,64,$1
bne $1,$L5
stl $31,24($15)
br $31,$L13
$L14:
stl $31,20($15)
br $31,$L15
$L16:
stl $31,16($15)
br $31,$L17
$L18:
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
lda $1,$LC17
lds $f10,0($1)
sts $f10,0($2)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,33($31)
lda $1,17412($1)
addq $2,$1,$2
lda $1,$LC17
lds $f10,0($1)
sts $f10,0($2)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,67($31)
lda $1,-30712($1)
addq $2,$1,$2
lda $1,$LC17
lds $f10,0($1)
sts $f10,0($2)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,100($31)
lda $1,-13300($1)
addq $2,$1,$2
lda $1,$LC18
lds $f10,0($1)
sts $f10,0($2)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,b
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,b
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,33($31)
lda $1,17412($1)
addq $2,$1,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,b
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,67($31)
lda $1,-30712($1)
addq $2,$1,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,c
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
lda $1,$LC17
lds $f10,0($1)
sts $f10,0($2)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,c
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,33($31)
lda $1,17412($1)
addq $2,$1,$2
lda $1,$LC17
lds $f10,0($1)
sts $f10,0($2)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,c
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,67($31)
lda $1,-30712($1)
addq $2,$1,$2
lda $1,$LC17
lds $f10,0($1)
sts $f10,0($2)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
ldl $2,24($15)
ldl $1,24($15)
mull $2,$1,$1
addl $31,$1,$1
bis $31,$1,$2
stq $2,32($15)
ldt $f10,32($15)
cvtqs $f10,$f11
lda $1,imax
ldl $1,0($1)
subl $1,1,$1
addl $31,$1,$2
lda $1,imax
ldl $1,0($1)
subl $1,1,$1
addl $31,$1,$1
mull $2,$1,$1
addl $31,$1,$1
bis $31,$1,$2
stq $2,32($15)
ldt $f12,32($15)
cvtqs $f12,$f10
divs $f11,$f10,$f10
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
sts $f10,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,wrk1
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
sts $f31,0($1)
ldl $3,24($15)
ldl $4,20($15)
ldl $5,16($15)
lda $6,bnd
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
lda $1,$LC17
lds $f10,0($1)
sts $f10,0($2)
ldl $1,16($15)
addl $1,1,$1
stl $1,16($15)
$L17:
lda $1,kmax
ldl $2,0($1)
ldl $1,16($15)
cmplt $1,$2,$1
bne $1,$L18
ldl $1,20($15)
addl $1,1,$1
stl $1,20($15)
$L15:
lda $1,jmax
ldl $2,0($1)
ldl $1,20($15)
cmplt $1,$2,$1
bne $1,$L16
ldl $1,24($15)
addl $1,1,$1
stl $1,24($15)
$L13:
lda $1,imax
ldl $2,0($1)
ldl $1,24($15)
cmplt $1,$2,$1
bne $1,$L14
bis $31,$15,$30
ldq $26,0($30)
ldq $15,8($30)
lda $30,48($30)
ret $31,($26),1
.end initmt
.align 2
.globl jacobi
.ent jacobi
jacobi:
.frame $15,64,$26,0
.mask 0x4008000,-64
ldgp $29,0($27)
$jacobi..ng:
lda $30,-64($30)
stq $26,0($30)
stq $15,8($30)
bis $31,$30,$15
.prologue 1
bis $31,$16,$1
stl $1,48($15)
stl $31,28($15)
br $31,$L24
$L25:
sts $f31,24($15)
lda $1,1($31)
stl $1,40($15)
br $31,$L26
$L27:
lda $1,1($31)
stl $1,36($15)
br $31,$L28
$L29:
lda $1,1($31)
stl $1,32($15)
br $31,$L30
$L31:
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f11,0($1)
ldl $1,40($15)
addl $1,1,$1
addl $31,$1,$3
ldl $4,36($15)
ldl $5,32($15)
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
muls $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,33($31)
lda $1,17412($1)
addq $2,$1,$1
lds $f12,0($1)
ldl $3,40($15)
ldl $1,36($15)
addl $1,1,$1
addl $31,$1,$4
ldl $5,32($15)
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
muls $f12,$f10,$f10
adds $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,67($31)
lda $1,-30712($1)
addq $2,$1,$1
lds $f12,0($1)
ldl $3,40($15)
ldl $4,36($15)
ldl $1,32($15)
addl $1,1,$1
addl $31,$1,$5
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
muls $f12,$f10,$f10
adds $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,b
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f12,0($1)
ldl $1,40($15)
addl $1,1,$1
addl $31,$1,$3
ldl $1,36($15)
addl $1,1,$1
addl $31,$1,$4
ldl $5,32($15)
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f13,0($1)
ldl $1,40($15)
addl $1,1,$1
addl $31,$1,$3
ldl $1,36($15)
subl $1,1,$1
addl $31,$1,$4
ldl $5,32($15)
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
subs $f13,$f10,$f13
ldl $1,40($15)
subl $1,1,$1
addl $31,$1,$3
ldl $1,36($15)
addl $1,1,$1
addl $31,$1,$4
ldl $5,32($15)
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
subs $f13,$f10,$f13
ldl $1,40($15)
subl $1,1,$1
addl $31,$1,$3
ldl $1,36($15)
subl $1,1,$1
addl $31,$1,$4
ldl $5,32($15)
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
adds $f13,$f10,$f10
muls $f12,$f10,$f10
adds $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,b
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,33($31)
lda $1,17412($1)
addq $2,$1,$1
lds $f12,0($1)
ldl $3,40($15)
ldl $1,36($15)
addl $1,1,$1
addl $31,$1,$4
ldl $1,32($15)
addl $1,1,$1
addl $31,$1,$5
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f13,0($1)
ldl $3,40($15)
ldl $1,36($15)
subl $1,1,$1
addl $31,$1,$4
ldl $1,32($15)
addl $1,1,$1
addl $31,$1,$5
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
subs $f13,$f10,$f13
ldl $3,40($15)
ldl $1,36($15)
addl $1,1,$1
addl $31,$1,$4
ldl $1,32($15)
subl $1,1,$1
addl $31,$1,$5
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
subs $f13,$f10,$f13
ldl $3,40($15)
ldl $1,36($15)
subl $1,1,$1
addl $31,$1,$4
ldl $1,32($15)
subl $1,1,$1
addl $31,$1,$5
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
adds $f13,$f10,$f10
muls $f12,$f10,$f10
adds $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,b
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,67($31)
lda $1,-30712($1)
addq $2,$1,$1
lds $f12,0($1)
ldl $1,40($15)
addl $1,1,$1
addl $31,$1,$3
ldl $4,36($15)
ldl $1,32($15)
addl $1,1,$1
addl $31,$1,$5
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f13,0($1)
ldl $1,40($15)
subl $1,1,$1
addl $31,$1,$3
ldl $4,36($15)
ldl $1,32($15)
addl $1,1,$1
addl $31,$1,$5
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
subs $f13,$f10,$f13
ldl $1,40($15)
addl $1,1,$1
addl $31,$1,$3
ldl $4,36($15)
ldl $1,32($15)
subl $1,1,$1
addl $31,$1,$5
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
subs $f13,$f10,$f13
ldl $1,40($15)
subl $1,1,$1
addl $31,$1,$3
ldl $4,36($15)
ldl $1,32($15)
subl $1,1,$1
addl $31,$1,$5
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
adds $f13,$f10,$f10
muls $f12,$f10,$f10
adds $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,c
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f12,0($1)
ldl $1,40($15)
subl $1,1,$1
addl $31,$1,$3
ldl $4,36($15)
ldl $5,32($15)
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
muls $f12,$f10,$f10
adds $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,c
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,33($31)
lda $1,17412($1)
addq $2,$1,$1
lds $f12,0($1)
ldl $3,40($15)
ldl $1,36($15)
subl $1,1,$1
addl $31,$1,$4
ldl $5,32($15)
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
muls $f12,$f10,$f10
adds $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,c
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,67($31)
lda $1,-30712($1)
addq $2,$1,$1
lds $f12,0($1)
ldl $3,40($15)
ldl $4,36($15)
ldl $1,32($15)
subl $1,1,$1
addl $31,$1,$5
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
muls $f12,$f10,$f10
adds $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,wrk1
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
adds $f11,$f10,$f10
sts $f10,20($15)
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,a
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$2
ldah $1,100($31)
lda $1,-13300($1)
addq $2,$1,$1
lds $f11,0($1)
lds $f10,20($15)
muls $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
subs $f11,$f10,$f11
ldl $3,40($15)
ldl $4,36($15)
ldl $5,32($15)
lda $6,bnd
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$5,$1
s4addq $1,0,$1
addq $1,$6,$1
lds $f10,0($1)
muls $f11,$f10,$f10
sts $f10,16($15)
lds $f11,16($15)
lds $f10,16($15)
muls $f11,$f10,$f11
lds $f10,24($15)
adds $f10,$f11,$f10
sts $f10,24($15)
ldl $5,40($15)
ldl $6,36($15)
ldl $7,32($15)
ldl $3,40($15)
ldl $4,36($15)
ldl $8,32($15)
lda $22,p
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$8,$1
s4addq $1,0,$1
addq $1,$22,$1
lds $f12,0($1)
lda $1,omega
lds $f11,0($1)
lds $f10,16($15)
muls $f11,$f10,$f10
adds $f12,$f10,$f10
lda $3,wrk2
bis $31,$5,$1
sll $1,6,$2
addq $2,$5,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$6,$1
sll $1,7,$1
addq $1,$6,$1
addq $2,$1,$1
addq $1,$7,$1
s4addq $1,0,$1
addq $1,$3,$1
sts $f10,0($1)
ldl $1,32($15)
addl $1,1,$1
stl $1,32($15)
$L30:
lda $1,kmax
ldl $1,0($1)
subl $1,1,$1
addl $31,$1,$2
ldl $1,32($15)
cmplt $1,$2,$1
bne $1,$L31
ldl $1,36($15)
addl $1,1,$1
stl $1,36($15)
$L28:
lda $1,jmax
ldl $1,0($1)
subl $1,1,$1
addl $31,$1,$2
ldl $1,36($15)
cmplt $1,$2,$1
bne $1,$L29
ldl $1,40($15)
addl $1,1,$1
stl $1,40($15)
$L26:
lda $1,imax
ldl $1,0($1)
subl $1,1,$1
addl $31,$1,$2
ldl $1,40($15)
cmplt $1,$2,$1
bne $1,$L27
lda $1,1($31)
stl $1,40($15)
br $31,$L35
$L36:
lda $1,1($31)
stl $1,36($15)
br $31,$L37
$L38:
lda $1,1($31)
stl $1,32($15)
br $31,$L39
$L40:
ldl $5,40($15)
ldl $6,36($15)
ldl $7,32($15)
ldl $3,40($15)
ldl $4,36($15)
ldl $8,32($15)
lda $22,wrk2
bis $31,$3,$1
sll $1,6,$2
addq $2,$3,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$4,$1
sll $1,7,$1
addq $1,$4,$1
addq $2,$1,$1
addq $1,$8,$1
s4addq $1,0,$1
addq $1,$22,$1
lds $f10,0($1)
lda $3,p
bis $31,$5,$1
sll $1,6,$2
addq $2,$5,$2
sll $2,7,$1
addq $2,$1,$2
bis $31,$6,$1
sll $1,7,$1
addq $1,$6,$1
addq $2,$1,$1
addq $1,$7,$1
s4addq $1,0,$1
addq $1,$3,$1
sts $f10,0($1)
ldl $1,32($15)
addl $1,1,$1
stl $1,32($15)
$L39:
lda $1,kmax
ldl $1,0($1)
subl $1,1,$1
addl $31,$1,$2
ldl $1,32($15)
cmplt $1,$2,$1
bne $1,$L40
ldl $1,36($15)
addl $1,1,$1
stl $1,36($15)
$L37:
lda $1,jmax
ldl $1,0($1)
subl $1,1,$1
addl $31,$1,$2
ldl $1,36($15)
cmplt $1,$2,$1
bne $1,$L38
ldl $1,40($15)
addl $1,1,$1
stl $1,40($15)
$L35:
lda $1,imax
ldl $1,0($1)
subl $1,1,$1
addl $31,$1,$2
ldl $1,40($15)
cmplt $1,$2,$1
bne $1,$L36
ldl $1,28($15)
addl $1,1,$1
stl $1,28($15)
$L24:
ldl $1,28($15)
ldl $2,48($15)
cmplt $1,$2,$1
bne $1,$L25
lds $f10,24($15)
cpys $f10,$f10,$f0
bis $31,$15,$30
ldq $26,0($30)
ldq $15,8($30)
lda $30,64($30)
ret $31,($26),1
.end jacobi
.rdata
.align 3
$LC19:
.long 0
.long 1078001664
.text
.align 2
.globl fflop
.ent fflop
fflop:
.frame $15,48,$26,0
.mask 0x4008000,-48
ldgp $29,0($27)
$fflop..ng:
lda $30,-48($30)
stq $26,0($30)
stq $15,8($30)
bis $31,$30,$15
.prologue 1
bis $31,$16,$1
bis $31,$17,$2
bis $31,$18,$3
stl $1,16($15)
stl $2,20($15)
stl $3,24($15)
ldl $1,24($15)
subl $1,2,$1
addl $31,$1,$1
bis $31,$1,$2
stq $2,32($15)
ldt $f10,32($15)
cvtqt $f10,$f11
ldl $1,20($15)
subl $1,2,$1
addl $31,$1,$1
bis $31,$1,$2
stq $2,32($15)
ldt $f12,32($15)
cvtqt $f12,$f10
mult $f11,$f10,$f11
ldl $1,16($15)
subl $1,2,$1
addl $31,$1,$1
bis $31,$1,$2
stq $2,32($15)
ldt $f12,32($15)
cvtqt $f12,$f10
mult $f11,$f10,$f11
lda $1,$LC19
ldt $f10,0($1)
mult $f11,$f10,$f10
cpys $f10,$f10,$f0
bis $31,$15,$30
ldq $26,0($30)
ldq $15,8($30)
lda $30,48($30)
ret $31,($26),1
.end fflop
.rdata
.align 3
$LC20:
.long 2696277389
.long 1051772663
.text
.align 2
.globl mflops
.ent mflops
mflops:
.frame $15,64,$26,0
.mask 0x4008000,-64
ldgp $29,0($27)
$mflops..ng:
lda $30,-64($30)
stq $26,0($30)
stq $15,8($30)
bis $31,$30,$15
.prologue 1
bis $31,$16,$1
stt $f17,24($15)
stt $f18,32($15)
stl $1,16($15)
ldt $f11,32($15)
ldt $f10,24($15)
divt $f11,$f10,$f11
lda $1,$LC20
ldt $f10,0($1)
mult $f11,$f10,$f11
ldl $1,16($15)
bis $31,$1,$2
stq $2,48($15)
ldt $f12,48($15)
cvtqt $f12,$f10
mult $f11,$f10,$f10
cpys $f10,$f10,$f0
bis $31,$15,$30
ldq $26,0($30)
ldq $15,8($30)
lda $30,64($30)
ret $31,($26),1
.end mflops
.lcomm base_usec.2681,4
.lcomm base_sec.2680,4
.rdata
.align 3
$LC21:
.long 0
.long 1093567616
.text
.align 2
.globl second
.ent second
second:
.frame $15,48,$26,0
.mask 0x4008000,-48
ldgp $29,0($27)
$second..ng:
lda $30,-48($30)
stq $26,0($30)
stq $15,8($30)
bis $31,$30,$15
.prologue 1
lda $1,24($15)
bis $31,$1,$16
bis $31,$31,$17
jsr $26,gettimeofday
ldgp $29,0($26)
lda $1,base_sec.2680
ldl $1,0($1)
bne $1,$L51
lda $1,base_usec.2681
ldl $1,0($1)
bne $1,$L51
ldl $2,24($15)
lda $1,base_sec.2680
stl $2,0($1)
ldl $2,28($15)
lda $1,base_usec.2681
stl $2,0($1)
stt $f31,16($15)
br $31,$L54
$L51:
ldl $2,24($15)
lda $1,base_sec.2680
ldl $1,0($1)
subl $2,$1,$1
addl $31,$1,$1
bis $31,$1,$2
stq $2,32($15)
ldt $f10,32($15)
cvtqt $f10,$f12
ldl $2,28($15)
lda $1,base_usec.2681
ldl $1,0($1)
subl $2,$1,$1
addl $31,$1,$1
bis $31,$1,$2
stq $2,32($15)
ldt $f10,32($15)
cvtqt $f10,$f11
lda $1,$LC21
ldt $f10,0($1)
divt $f11,$f10,$f10
addt $f12,$f10,$f10
stt $f10,16($15)
$L54:
ldt $f10,16($15)
cpys $f10,$f10,$f0
bis $31,$15,$30
ldq $26,0($30)
ldq $15,8($30)
lda $30,48($30)
ret $31,($26),1
.end second
.lcomm p,2180100
.lcomm a,8720400
.lcomm b,6540300
.lcomm c,6540300
.lcomm bnd,2180100
.lcomm wrk1,2180100
.lcomm wrk2,2180100
.lcomm imax,4
.lcomm jmax,4
.lcomm kmax,4
.lcomm omega,4
ds15>
>----------------------------------------------------<
I compile benchmark.s on NetBSD/Alpha.
>----------------------------------------------------<
# uname -a
NetBSD 7.99.20 NetBSD 7.99.20 (GENERIC-$Revision: 1.364 $) #1: Sat Aug 1 12:15:01 UTC 2015 naruaki@:/usr/obj.alpha/sys/arch/alpha/compile/GENERIC alpha
# gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/lto-wrapper
Target: alpha--netbsd
Configured with: /usr/src/tools/gcc/../../external/gpl3/gcc/dist/configure --target=alpha--netbsd --enable-long-long --enable-threads --with-bugurl=http://www.NetBSD.org/Misc/send-pr.html --with-pkgversion='NetBSD nb2 20150115' --with-system-zlib --enable-__cxa_atexit --enable-libstdcxx-threads --enable-libstdcxx-time=rt --enable-lto --with-mpc-lib=/var/obj/mknative/alpha/usr/src/external/lgpl3/mpc/lib/libmpc --with-mpfr-lib=/var/obj/mknative/alpha/usr/src/external/lgpl3/mpfr/lib/libmpfr --with-gmp-lib=/var/obj/mknative/alpha/usr/src/external/lgpl3/gmp/lib/libgmp --with-mpc-include=/usr/src/external/lgpl3/mpc/dist/src --with-mpfr-include=/usr/src/external/lgpl3/mpfr/dist/src --with-gmp-include=/usr/src/external/lgpl3/gmp/lib/libgmp/arch/alpha --enable-tls --disable-multilib --disable-symvers --disable-libstdcxx-pch --build=x86_64-unknown-netbsd6.0. --host=alpha--netbsd --with-sysroot=/var/obj/mknative/alpha/usr/src/destdir.alpha
Thread model: posix
gcc version 4.8.5 (nb2 20150115)
# ls -l benchmark.s
-rw-r--r-- 1 root wheel 28656 Aug 1 2015 benchmark.s
# gcc benchmark.s -o benchmark
benchmark.s: Assembler messages:
benchmark.s:2: Error: unknown pseudo-op: `.verstamp'
benchmark.s:7: Error: unknown pseudo-op: `.rdata'
benchmark.s:241: Error: unknown pseudo-op: `.rdata'
benchmark.s:1702: Error: unknown pseudo-op: `.rdata'
benchmark.s:1760: Error: unknown pseudo-op: `.rdata'
benchmark.s:1804: Error: unknown pseudo-op: `.rdata'
# vi benchmark.s
(delete line 2, line 7, line 241, line 1702, line 1760, line 1804)
# gcc benchmark.s -o benchmark
# ./benchmark
mimax = 65 mjmax = 65 mkmax = 129
imax = 64 jmax = 64 kmax =128
Start rehearsal measurement process.
Measure the performance in 3 times.
MFLOPS: 49.403088 time(s): 1.000000 3.288628e-03
Now, start the actual measurement process.
The loop will be excuted in 180 times
This will take about one minute.
Wait for a while
Loop executed for 180 times
Gosa : 1.752524e-03
MFLOPS measured : 61.753860 cpu : 48.000000
Score based on Pentium III 600MHz : 0.753096
>----------------------------------------------------<
Hmm...
It appears to be correct...
I don't carry out test on old NetBSD version.
>How-To-Repeat:
>Fix:
Sorry
Home |
Main Index |
Thread Index |
Old Index