Hi,
evbarm's gcc (gcc version 4.5.4 (NetBSD nb1 20120916)) misoptimize 64-bit
shift operation at -O2, and I've found that this bug has already been fixed
upstream by http://gcc.gnu.org/ml/gcc-patches/2010-09/msg01070.html.
I tried to reduce my test case, however, I don't have much time, so
the resulting
program is by no means not small, but i do succeed in reducing the optimization
flags to a minimum (see attached mp3.c).
command "arm--netbsdelf-gcc -O1 -S mp3.c" will compile the 64-bit right shift
by 1-bit (in line 101 and 103 of mp3.c) just fine:
.L9:
movs r1, r3, lsr #1
mov r0, r2, rrx
(r3, r2) contains the 64-bit source, and it's shift right 1-bit and
saved to (r1, r0).
however, if I add these optimization flags to the command line:
-fcaller-saves -fcse-follow-jumps -fregmove -fschedule-insns
the assembly for the 2nd right shift will be:
.L9:
movs r1, r2, lsr #1
mov r0, r1, rrx
which is obviously wrong (the first lsr already clobbers r1).
adding "-dp" to the command shows that the faulting machine description pattern
is arm_lshrdi3_1bit.
.L9:
movs r1, r2, lsr #1 @ 174 arm_lshrdi3_1bit/2 [length = 8]
mov r0, r1, rrx
After applying the patch (also attached), this problem is resolved.
Thusly, please consider import this simple (and trivial) patch.
Thank you.
Cheers,
minux
Attachment:
gcc.patch
Description: Binary data
int nsavederrors, nerrors;
typedef unsigned char uchar;
typedef unsigned long long uvlong;
// from gc/go.h
enum
{
Mpscale = 29, // safely smaller than bits in a long
Mpprec = 16, // Mpscale*Mpprec is max number of bits
Mpnorm = Mpprec - 1, // significant words in a normalized float
Mpbase = 1L << Mpscale,
Mpsign = Mpbase >> 1,
Mpmask = Mpbase - 1,
Mpdebug = 0,
};
typedef struct Mpint Mpint;
struct Mpint
{
long a[Mpprec];
uchar neg;
uchar ovf;
};
typedef struct Mpflt Mpflt;
struct Mpflt
{
Mpint val;
short exp;
};
double ldexp(double x, int n);
int sigfig(Mpflt *a);
void yyerror(const char *);
void mpnorm(Mpflt *a);
void mpshiftfix(Mpint *, int);
double
mpgetflt(Mpflt *a)
{
int s, i, e;
uvlong v, vm;
double f;
if(a->val.ovf && nsavederrors+nerrors == 0)
yyerror("mpgetflt ovf");
s = sigfig(a);
if(s == 0)
return 0;
if(s != Mpnorm) {
yyerror("mpgetflt norm");
mpnorm(a);
}
while((a->val.a[Mpnorm-1] & Mpsign) == 0) {
mpshiftfix(&a->val, 1);
a->exp -= 1;
}
// the magic numbers (64, 63, 53, 10, -1074) are
// IEEE specific. this should be done machine
// independently or in the 6g half of the compiler
// pick up the mantissa and a rounding bit in a uvlong
s = 53+1;
v = 0;
for(i=Mpnorm-1; s>=Mpscale; i--) {
v = (v<<Mpscale) | a->val.a[i];
s -= Mpscale;
}
vm = v;
if(s > 0)
vm = (vm<<s) | (a->val.a[i]>>(Mpscale-s));
// continue with 64 more bits
s += 64;
for(; s>=Mpscale; i--) {
v = (v<<Mpscale) | a->val.a[i];
s -= Mpscale;
}
if(s > 0)
v = (v<<s) | (a->val.a[i]>>(Mpscale-s));
// gradual underflow
e = Mpnorm*Mpscale + a->exp - 53;
if(e < -1074) {
s = -e - 1074;
if(s > 54)
s = 54;
v |= vm & ((1ULL<<s) - 1);
vm >>= s;
e = -1074;
}
//print("vm=%.16llux v=%.16llux\n", vm, v);
// round toward even
if(v != 0 || (vm&2ULL) != 0)
vm = (vm>>1) + (vm&1ULL);
else
vm >>= 1;
f = (double)(vm);
f = ldexp(f, e);
if(a->val.neg)
f = -f;
return f;
}