Hi, evbarm's gcc (gcc version 4.5.4 (NetBSD nb1 20120916)) misoptimize 64-bit shift operation at -O2, and I've found that this bug has already been fixed upstream by http://gcc.gnu.org/ml/gcc-patches/2010-09/msg01070.html. I tried to reduce my test case, however, I don't have much time, so the resulting program is by no means not small, but i do succeed in reducing the optimization flags to a minimum (see attached mp3.c). command "arm--netbsdelf-gcc -O1 -S mp3.c" will compile the 64-bit right shift by 1-bit (in line 101 and 103 of mp3.c) just fine: .L9: movs r1, r3, lsr #1 mov r0, r2, rrx (r3, r2) contains the 64-bit source, and it's shift right 1-bit and saved to (r1, r0). however, if I add these optimization flags to the command line: -fcaller-saves -fcse-follow-jumps -fregmove -fschedule-insns the assembly for the 2nd right shift will be: .L9: movs r1, r2, lsr #1 mov r0, r1, rrx which is obviously wrong (the first lsr already clobbers r1). adding "-dp" to the command shows that the faulting machine description pattern is arm_lshrdi3_1bit. .L9: movs r1, r2, lsr #1 @ 174 arm_lshrdi3_1bit/2 [length = 8] mov r0, r1, rrx After applying the patch (also attached), this problem is resolved. Thusly, please consider import this simple (and trivial) patch. Thank you. Cheers, minux
Attachment:
gcc.patch
Description: Binary data
int nsavederrors, nerrors; typedef unsigned char uchar; typedef unsigned long long uvlong; // from gc/go.h enum { Mpscale = 29, // safely smaller than bits in a long Mpprec = 16, // Mpscale*Mpprec is max number of bits Mpnorm = Mpprec - 1, // significant words in a normalized float Mpbase = 1L << Mpscale, Mpsign = Mpbase >> 1, Mpmask = Mpbase - 1, Mpdebug = 0, }; typedef struct Mpint Mpint; struct Mpint { long a[Mpprec]; uchar neg; uchar ovf; }; typedef struct Mpflt Mpflt; struct Mpflt { Mpint val; short exp; }; double ldexp(double x, int n); int sigfig(Mpflt *a); void yyerror(const char *); void mpnorm(Mpflt *a); void mpshiftfix(Mpint *, int); double mpgetflt(Mpflt *a) { int s, i, e; uvlong v, vm; double f; if(a->val.ovf && nsavederrors+nerrors == 0) yyerror("mpgetflt ovf"); s = sigfig(a); if(s == 0) return 0; if(s != Mpnorm) { yyerror("mpgetflt norm"); mpnorm(a); } while((a->val.a[Mpnorm-1] & Mpsign) == 0) { mpshiftfix(&a->val, 1); a->exp -= 1; } // the magic numbers (64, 63, 53, 10, -1074) are // IEEE specific. this should be done machine // independently or in the 6g half of the compiler // pick up the mantissa and a rounding bit in a uvlong s = 53+1; v = 0; for(i=Mpnorm-1; s>=Mpscale; i--) { v = (v<<Mpscale) | a->val.a[i]; s -= Mpscale; } vm = v; if(s > 0) vm = (vm<<s) | (a->val.a[i]>>(Mpscale-s)); // continue with 64 more bits s += 64; for(; s>=Mpscale; i--) { v = (v<<Mpscale) | a->val.a[i]; s -= Mpscale; } if(s > 0) v = (v<<s) | (a->val.a[i]>>(Mpscale-s)); // gradual underflow e = Mpnorm*Mpscale + a->exp - 53; if(e < -1074) { s = -e - 1074; if(s > 54) s = 54; v |= vm & ((1ULL<<s) - 1); vm >>= s; e = -1074; } //print("vm=%.16llux v=%.16llux\n", vm, v); // round toward even if(v != 0 || (vm&2ULL) != 0) vm = (vm>>1) + (vm&1ULL); else vm >>= 1; f = (double)(vm); f = ldexp(f, e); if(a->val.neg) f = -f; return f; }