port-arm32: ARM710 bug in UMULL

Subject: ARM710 bug in UMULL
To: None <port-arm32@netbsd.org>
From: None <kim@pvv.ntnu.no>
List: port-arm32
Date: 11/01/2001 12:02:19
The ARM7 is said to be able to perform 64 bits = 32 bits * 32 bits
multiplications, such as in the UMULL instruction, say: 
        umull   r2,r4, r1, r3

However, this does not work on my RiscPC700 running NetBSD. The processor is
VLSI/ARM 
9226 B557125
VY86C710A
ARM710a
ARM Ltd

Are there known bugs with the ARM710a?

I have tested thoroughly, with handassembling and dissassembling single
bits in the instruction, to eliminate assembler bugs and such. The strange
thing is I got it to work once, apparently. Perhaps it is sensitive to
different instances of the same instruction, the registers, or the arguments.

I am currently using this processor to develop bluetooth and crypto
applications, in which the UMULL instruction is very important for
efficiency.  NetBSD is really good for such development work. Thats
why I prefer it.

Below is the program I use to test this. It should return 21, because
thats 7*3, but it usually returns 2, or about 4 000 000 000. It is
compiled correctly by the GNU assembler as. F.ex. gcc -o x x.s

Kim0


rfp	.req	r9
sl	.req	r10
fp	.req	r11
ip	.req	r12
sp	.req	r13
lr	.req	r14
pc	.req	r15
gcc2_compiled.:
___gnu_compiled_c:
.text
	.align	0
	.global	_gurgle
	.type	 _gurgle,#function
_gurgle:
	@ args = 0, pretend = 0, frame = 8
	@ frame_needed = 1, current_function_anonymous_args = 0
	mov	ip, sp
	stmfd	sp!, {r4, fp, ip, lr, pc}
	sub	fp, ip, #4
	sub	sp, sp, #8
	str	r0, [fp, #-20]
	str	r1, [fp, #-24]
	ldr	r3, [fp, #-20]
	ldr	r1, [fp, #-24]


	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0
	mov	r0,r0		@ nop, 0xE1A00000 eller 0x a0e1 0000
	@ 1110 ignore flags, -00 data processing, 0 register2, 1-101 mov,
	@ 0 not alter kondition codes, -0000 r0, -0000 r0, -0000-0000-0000

	mov	r1, #7
	mov	r3, #3
		
	umull	r2,r4, r1, r3
	@ 0x 9123 84e0 eller 0xe0842391
	@ 1110:e ignore flags, 0000:0-1 mull, 0 unsigned, 0 no accumulate,
	@ 0:8 unsigned, rdhi=0100:4, rdlo=0010:2, Rs=0011:3, 1001:9, Rm=0001:1
 	
	mov	r3, r2
	mov	r4, #0
	mov	r1, r4
	mov	r0, r3
	b	L1
L1:
	ldmea	fp, {r4, fp, sp, pc}
Lfe1:
	.size	 _gurgle,Lfe1-_gurgle
	.align	0
LC0:
	.ascii	"%d\012\000"
	.align	0
	.global	_main
	.type	 _main,#function
_main:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 1, current_function_anonymous_args = 0
	mov	ip, sp
	stmfd	sp!, {r4, fp, ip, lr, pc}
	sub	fp, ip, #4
	bl	___main
	mov	r0, #3
	mov	r1, #13
	bl	_gurgle
	mov	r4, r1
	mov	r3, r0
	ldr	r0, L4
	mov	r2, r4
	mov	r1, r3
	bl	_printf
	b	L3
L5:
	.align	0
L4:
	.word	LC0
L3:
L2:
	ldmea	fp, {r4, fp, sp, pc}
Lfe2:
	.size	 _main,Lfe2-_main