Port-amd64 archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[patch] assembly clean-up: xor
Browsing through the amd64 assembly code, I found that it could use a
little bit of clean-up. E.g., many functions use 64 bit instructions,
even in cases where it is not necessary.
I prepared a patch set, which
* uses shorter instruction encodings, if it does not effect speed
* prefers instructions with lower delay
* unifies some constructs between functions
Altogether, it's not too intrusive, i.e. it's more an adjustment, not a
rewrite.
I attached the first patch, which is the simplest one (it's also the one
which makes the most noise - with it applied, I think it's easier to
review the following ones):
All XOR operations for clearing a register are converted to 32 bit. This
is sufficient because the upper 32 bits are set to 0 automatically. And
it saves one byte (the REX.W prefix) in case of the first eight GPRs.
You can see that compilers also prefer the 32 bit variant. As an
example, none of the 64 bit ones are left in the kernel, when this patch
is applied:
~$ objdump -d /netbsd.old | \
grep -Ec '[[:blank:]]xor[[:blank:]]+%r([a-s][ipx]|[89]|1[0-5]),%r\1'
41
~$ objdump -d /netbsd | \
grep -Ec '[[:blank:]]xor[[:blank:]]+%r([a-s][ipx]|[89]|1[0-5]),%r\1'
0
(and the .text section of the kernel shrinks by 64 bytes)
Best regards,
Jo.
Index: common/lib/libc/arch/x86_64/string/memchr.S
===================================================================
RCS file: /cvsroot/src/common/lib/libc/arch/x86_64/string/memchr.S,v
retrieving revision 1.6
diff -u -r1.6 memchr.S
--- common/lib/libc/arch/x86_64/string/memchr.S 22 Mar 2014 19:16:34 -0000 1.6
+++ common/lib/libc/arch/x86_64/string/memchr.S 4 May 2015 15:38:55 -0000
@@ -104,6 +104,6 @@
jmp 2b
/* Not found */
-30: xorq %rax,%rax
+30: xorl %eax,%eax
ret
END(memchr)
Index: common/lib/libc/arch/x86_64/string/strchr.S
===================================================================
RCS file: /cvsroot/src/common/lib/libc/arch/x86_64/string/strchr.S,v
retrieving revision 1.7
diff -u -r1.7 strchr.S
--- common/lib/libc/arch/x86_64/string/strchr.S 22 Mar 2014 19:16:34 -0000 1.7
+++ common/lib/libc/arch/x86_64/string/strchr.S 4 May 2015 15:38:55 -0000
@@ -111,7 +111,7 @@
/* Source misaligned: read aligned word and make low bytes invalid */
/* I (dsl) think a _ALIGN_TEXT here will slow things down! */
20:
- xor %rcx,%rcx
+ xor %ecx,%ecx
sub %dil,%cl /* Convert low address values 1..7 ... */
sbb %rsi,%rsi /* carry was set, so %rsi now ~0u! */
and $7,%cl /* ... to 7..1 */
Index: common/lib/libc/arch/x86_64/string/strrchr.S
===================================================================
RCS file: /cvsroot/src/common/lib/libc/arch/x86_64/string/strrchr.S,v
retrieving revision 1.3
diff -u -r1.3 strrchr.S
--- common/lib/libc/arch/x86_64/string/strrchr.S 22 Mar 2014 19:16:34 -0000 1.3
+++ common/lib/libc/arch/x86_64/string/strrchr.S 4 May 2015 15:38:55 -0000
@@ -13,7 +13,7 @@
movzbq %sil,%rcx
/* zero return value */
- xorq %rax,%rax
+ xorl %eax,%eax
/*
* Align to word boundary.
Index: lib/libc/arch/x86_64/gen/__setjmp14.S
===================================================================
RCS file: /cvsroot/src/lib/libc/arch/x86_64/gen/__setjmp14.S,v
retrieving revision 1.3
diff -u -r1.3 __setjmp14.S
--- lib/libc/arch/x86_64/gen/__setjmp14.S 22 May 2014 15:01:56 -0000 1.3
+++ lib/libc/arch/x86_64/gen/__setjmp14.S 4 May 2015 15:39:37 -0000
@@ -66,7 +66,7 @@
leaq (_JB_SIGMASK * 8)(%rdi),%rdx
xorl %edi,%edi
- xorq %rsi,%rsi
+ xorl %esi,%esi
#ifdef __PIC__
call PIC_PLT(_C_LABEL(__sigprocmask14))
@@ -83,7 +83,7 @@
leaq (_JB_SIGMASK * 8)(%rdi),%rsi
movl $3,%edi /* SIG_SETMASK */
- xorq %rdx,%rdx
+ xorl %edx,%edx
pushq %r8
#ifdef __PIC__
Index: lib/libc/arch/x86_64/gen/__sigsetjmp14.S
===================================================================
RCS file: /cvsroot/src/lib/libc/arch/x86_64/gen/__sigsetjmp14.S,v
retrieving revision 1.3
diff -u -r1.3 __sigsetjmp14.S
--- lib/libc/arch/x86_64/gen/__sigsetjmp14.S 22 May 2014 15:01:56 -0000 1.3
+++ lib/libc/arch/x86_64/gen/__sigsetjmp14.S 4 May 2015 15:39:37 -0000
@@ -70,7 +70,7 @@
leaq (_JB_SIGMASK * 8)(%rdi),%rdx
xorl %edi,%edi
- xorq %rsi,%rsi
+ xorl %esi,%esi
#ifdef __PIC__
call PIC_PLT(_C_LABEL(__sigprocmask14))
@@ -89,7 +89,7 @@
jz 2f
leaq (_JB_SIGMASK * 8)(%rdi),%rsi
movl $3,%edi /* SIG_SETMASK */
- xorq %rdx,%rdx
+ xorl %edx,%edx
#ifdef __PIC__
call PIC_PLT(_C_LABEL(__sigprocmask14))
Index: sys/arch/amd64/acpi/acpi_wakeup_low.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/acpi/acpi_wakeup_low.S,v
retrieving revision 1.4
diff -u -r1.4 acpi_wakeup_low.S
--- sys/arch/amd64/acpi/acpi_wakeup_low.S 11 May 2008 15:32:20 -0000 1.4
+++ sys/arch/amd64/acpi/acpi_wakeup_low.S 4 May 2015 15:39:39 -0000
@@ -99,7 +99,7 @@
movq ACPI_SUSPEND_REG+(5*8)(%r8),%r14
movq ACPI_SUSPEND_REG+(6*8)(%r8),%r15
- xorq %rax,%rax
+ xorl %eax,%eax
pushq ACPI_SUSPEND_REG+(7*8)(%r8)
popfq
Index: sys/arch/amd64/amd64/amd64_trap.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/amd64_trap.S,v
retrieving revision 1.2
diff -u -r1.2 amd64_trap.S
--- sys/arch/amd64/amd64/amd64_trap.S 12 Feb 2014 19:53:49 -0000 1.2
+++ sys/arch/amd64/amd64/amd64_trap.S 4 May 2015 15:39:39 -0000
@@ -415,7 +415,7 @@
movabsq $4f,%rdi
movl CPUVAR(ILEVEL),%esi
movl %ebx,%edx
- xorq %rax,%rax
+ xorl %eax,%eax
call _C_LABEL(printf)
movl %ebx,%edi
call _C_LABEL(spllower)
Index: sys/arch/amd64/amd64/copy.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/copy.S,v
retrieving revision 1.18
diff -u -r1.18 copy.S
--- sys/arch/amd64/amd64/copy.S 7 Jul 2010 01:13:29 -0000 1.18
+++ sys/arch/amd64/amd64/copy.S 4 May 2015 15:39:39 -0000
@@ -93,7 +93,7 @@
jnz 2f
cmpl $0, L_DOPREEMPT(%rbx)
jz 2f
- xorq %rdi, %rdi
+ xorl %edi, %edi
call _C_LABEL(kpreempt)
2:
cmpl $0, CPUVAR(WANT_PMAPLOAD)
@@ -143,7 +143,7 @@
rep
movsb
- xorq %rax,%rax
+ xorl %eax,%eax
ret
# Using 'rep movs' to copy backwards is not as fast as for forwards copies
@@ -167,7 +167,7 @@
movsq
cld
.Lkcopy_end:
- xorq %rax,%rax
+ xorl %eax,%eax
ret
ENTRY(copyout)
@@ -266,7 +266,7 @@
.Lcopyoutstr_end:
/* Success -- 0 byte reached. */
decq %rdx
- xorq %rax,%rax
+ xorl %eax,%eax
jmp copystr_return
2: /* rdx is zero -- return EFAULT or ENAMETOOLONG. */
@@ -306,7 +306,7 @@
/* Success -- 0 byte reached. */
decq %rdx
- xorq %rax,%rax
+ xorl %eax,%eax
jmp copystr_return
2: /* edx is zero -- return EFAULT or ENAMETOOLONG. */
@@ -422,7 +422,7 @@
movq %r11,PCB_ONFAULT(%rcx)
movq %rsi,(%rdi)
- xorq %rax,%rax
+ xorl %eax,%eax
movq %rax,PCB_ONFAULT(%rcx)
ret
DEFERRED_SWITCH_CALL
@@ -438,7 +438,7 @@
movq %r11,PCB_ONFAULT(%rcx)
movw %si,(%rdi)
- xorq %rax,%rax
+ xorl %eax,%eax
movq %rax,PCB_ONFAULT(%rcx)
ret
DEFERRED_SWITCH_CALL
@@ -453,7 +453,7 @@
leaq _C_LABEL(fusuintrfailure)(%rip),%r11
movq %r11,PCB_ONFAULT(%rcx)
movw %si,(%rdi)
- xorq %rax,%rax
+ xorl %eax,%eax
movq %rax,PCB_ONFAULT(%rcx)
ret
@@ -468,7 +468,7 @@
movq %r11,PCB_ONFAULT(%rcx)
movb %sil,(%rdi)
- xorq %rax,%rax
+ xorl %eax,%eax
movq %rax,PCB_ONFAULT(%rcx)
ret
DEFERRED_SWITCH_CALL
@@ -513,7 +513,7 @@
* Set the return values.
*/
movq %rax, (%rcx)
- xorq %rax, %rax
+ xorl %eax, %eax
ret
DEFERRED_SWITCH_CALL
@@ -537,7 +537,7 @@
* Set the return values.
*/
movl %eax, (%rcx)
- xorq %rax, %rax
+ xorl %eax, %eax
ret
DEFERRED_SWITCH_CALL
Index: sys/arch/amd64/amd64/cpu_in_cksum.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/cpu_in_cksum.S,v
retrieving revision 1.2
diff -u -r1.2 cpu_in_cksum.S
--- sys/arch/amd64/amd64/cpu_in_cksum.S 22 Jun 2013 05:56:32 -0000 1.2
+++ sys/arch/amd64/amd64/cpu_in_cksum.S 4 May 2015 15:39:39 -0000
@@ -100,8 +100,8 @@
cmovb %esi, %ebp
subl %ebp, %esi
- xorq %r9, %r9
- xorq %r10, %r10
+ xorl %r9d, %r9d
+ xorl %r10d, %r10d
.Mmbuf_align_word:
/* Already aligned on a word boundary? */
@@ -179,7 +179,7 @@
addq %r9, %r10
movq %r10, %rax
shrq $62, %rax
- xorq %r9, %r9
+ xorl %r9d, %r9d
testb %al, %al
jz .Mmbuf_inner_loop
@@ -189,7 +189,7 @@
1:
addq %r10, %r8
adcq $0, %r8
- xorq %r10, %r10
+ xorl %r10d, %r10d
jmp .Mmbuf_inner_loop
Index: sys/arch/amd64/amd64/cpufunc.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/cpufunc.S,v
retrieving revision 1.25
diff -u -r1.25 cpufunc.S
--- sys/arch/amd64/amd64/cpufunc.S 12 Feb 2014 23:24:09 -0000 1.25
+++ sys/arch/amd64/amd64/cpufunc.S 4 May 2015 15:39:39 -0000
@@ -191,7 +191,7 @@
ENTRY(rdmsr)
movq %rdi, %rcx
- xorq %rax, %rax
+ xorl %eax, %eax
rdmsr
shlq $32, %rdx
orq %rdx, %rax
@@ -207,7 +207,7 @@
ENTRY(rdmsr_locked)
movq %rdi, %rcx
- xorq %rax, %rax
+ xorl %eax, %eax
movl $OPTERON_MSR_PASSCODE, %edi
rdmsr
shlq $32, %rdx
@@ -239,7 +239,7 @@
movl %eax, %eax /* zero-extend %eax -> %rax */
orq %rdx, %rax
movq %rax, (%rsi) /* *data */
- xorq %rax, %rax /* "no error" */
+ xorl %eax, %eax /* "no error" */
movq %rax, PCB_ONFAULT(%r8)
ret
@@ -276,7 +276,7 @@
#endif
ENTRY(cpu_counter)
- xorq %rax, %rax
+ xorl %eax, %eax
rdtsc
shlq $32, %rdx
orq %rdx, %rax
@@ -290,7 +290,7 @@
ENTRY(rdpmc)
movq %rdi, %rcx
- xorq %rax, %rax
+ xorl %eax, %eax
rdpmc
shlq $32, %rdx
orq %rdx, %rax
@@ -518,7 +518,7 @@
ENTRY(inb)
movq %rdi, %rdx
- xorq %rax, %rax
+ xorl %eax, %eax
inb %dx, %al
ret
@@ -532,7 +532,7 @@
ENTRY(inw)
movq %rdi, %rdx
- xorq %rax, %rax
+ xorl %eax, %eax
inw %dx, %ax
ret
@@ -546,7 +546,7 @@
ENTRY(inl)
movq %rdi, %rdx
- xorq %rax, %rax
+ xorl %eax, %eax
inl %dx, %eax
ret
Index: sys/arch/amd64/amd64/lock_stubs.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/lock_stubs.S,v
retrieving revision 1.25
diff -u -r1.25 lock_stubs.S
--- sys/arch/amd64/amd64/lock_stubs.S 22 Jun 2013 06:23:28 -0000 1.25
+++ sys/arch/amd64/amd64/lock_stubs.S 4 May 2015 15:39:39 -0000
@@ -61,7 +61,7 @@
ENTRY(mutex_enter)
movq CPUVAR(CURLWP), %rcx
- xorq %rax, %rax
+ xorl %eax, %eax
LOCK(1)
cmpxchgq %rcx, (%rdi)
jnz 1f
@@ -81,7 +81,7 @@
*/
ENTRY(mutex_exit)
movq CPUVAR(CURLWP), %rax
- xorq %rdx, %rdx
+ xorl %edx, %edx
cmpxchgq %rdx, (%rdi)
jnz 1f
ret
@@ -200,7 +200,7 @@
* Writer: if the compare-and-set fails, don't bother retrying.
*/
2: movq CPUVAR(CURLWP), %rcx
- xorq %rax, %rax
+ xorl %eax, %eax
orq $RW_WRITE_LOCKED, %rcx
LOCK(3)
cmpxchgq %rcx, (%rdi)
@@ -278,7 +278,7 @@
* Writer: if the compare-and-set fails, don't bother retrying.
*/
2: movq CPUVAR(CURLWP), %rcx
- xorq %rax, %rax
+ xorl %eax, %eax
orq $RW_WRITE_LOCKED, %rcx
LOCK(9)
cmpxchgq %rcx, (%rdi)
Index: sys/arch/amd64/amd64/locore.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.77
diff -u -r1.77 locore.S
--- sys/arch/amd64/amd64/locore.S 17 Aug 2014 21:17:43 -0000 1.77
+++ sys/arch/amd64/amd64/locore.S 4 May 2015 15:39:39 -0000
@@ -762,7 +762,7 @@
movq %rax,(_C_LABEL(lwp0)+L_PCB)(%rip) /* XXX L_PCB != uarea */
leaq (USPACE-FRAMESIZE)(%rax),%rsp
movq %rsi,PCB_CR3(%rax) # pcb->pcb_cr3
- xorq %rbp,%rbp # mark end of frames
+ xorl %ebp,%ebp # mark end of frames
xorw %ax,%ax
movw %ax,%gs
@@ -787,7 +787,7 @@
movq %rsi, %rbx
/* Clear BSS. */
- xorq %rax,%rax
+ xorl %eax,%eax
movq $_C_LABEL(__bss_start),%rdi
movq $_C_LABEL(_end),%rcx
subq %rdi,%rcx
@@ -849,7 +849,7 @@
*/
leaq (USPACE-FRAMESIZE)(%rsi),%rsp
- xorq %rbp,%rbp
+ xorl %ebp,%ebp
xorw %ax,%ax
movw %ax,%gs
@@ -1043,7 +1043,7 @@
jne 32f
/* Zero out %fs/%gs registers and GDT descriptors. */
- xorq %rax, %rax
+ xorl %eax, %eax
movw %ax, %fs
CLI(cx)
SWAPGS
@@ -1245,7 +1245,7 @@
movl TF_RDI(%rsp),%edx
movl %ebx,%ecx
movl CPUVAR(ILEVEL),%r8d
- xorq %rax,%rax
+ xorl %eax,%eax
call _C_LABEL(printf)
movl $IPL_NONE,%edi
call _C_LABEL(spllower)
@@ -1279,7 +1279,7 @@
movq %rbp,%rsi
movq %rbp,%r14 /* for .Lsyscall_checkast */
movq %rax,%rdi
- xorq %rbp,%rbp
+ xorl %ebp,%ebp
call _C_LABEL(lwp_startup)
movq %r13,%rdi
call *%r12
@@ -1337,7 +1337,7 @@
pushq %rbp
movq %rsp,%rbp
movl $(PAGE_SIZE/64), %ecx
- xorq %rax, %rax
+ xorl %eax, %eax
.align 16
1:
testl $RESCHED_KPREEMPT, CPUVAR(RESCHED)
@@ -1372,7 +1372,7 @@
ENTRY(pagezero)
movq $-PAGE_SIZE,%rdx
subq %rdx,%rdi
- xorq %rax,%rax
+ xorl %eax,%eax
1:
movnti %rax,(%rdi,%rdx)
movnti %rax,8(%rdi,%rdx)
Index: sys/arch/amd64/amd64/spl.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/spl.S,v
retrieving revision 1.29
diff -u -r1.29 spl.S
--- sys/arch/amd64/amd64/spl.S 27 Nov 2014 04:48:39 -0000 1.29
+++ sys/arch/amd64/amd64/spl.S 4 May 2015 15:39:39 -0000
@@ -152,7 +152,7 @@
IDTVEC(preemptrecurse)
movl $IPL_PREEMPT, CPUVAR(ILEVEL)
sti
- xorq %rdi, %rdi
+ xorl %edi, %edi
call _C_LABEL(kpreempt)
cli
jmp *%r13 /* back to Xspllower */
Index: sys/arch/amd64/amd64/vector.S
===================================================================
RCS file: /cvsroot/src/sys/arch/amd64/amd64/vector.S,v
retrieving revision 1.44
diff -u -r1.44 vector.S
--- sys/arch/amd64/amd64/vector.S 25 Jun 2013 00:27:22 -0000 1.44
+++ sys/arch/amd64/amd64/vector.S 4 May 2015 15:39:39 -0000
@@ -193,7 +193,7 @@
sti
pushq %rbx
movq %rsp,%rsi
- xorq %rdi,%rdi
+ xorl %edi,%edi
call _C_LABEL(lapic_clockintr)
jmp _C_LABEL(Xdoreti)
2:
Index: sys/lib/libkern/arch/x86_64/scanc.S
===================================================================
RCS file: /cvsroot/src/sys/lib/libkern/arch/x86_64/scanc.S,v
retrieving revision 1.2
diff -u -r1.2 scanc.S
--- sys/lib/libkern/arch/x86_64/scanc.S 28 Apr 2008 20:24:06 -0000 1.2
+++ sys/lib/libkern/arch/x86_64/scanc.S 4 May 2015 15:39:43 -0000
@@ -42,7 +42,7 @@
testl %ecx,%ecx
jz 2f
movq %r11,%rdi
- xorq %rax,%rax
+ xorl %eax,%eax
cld
1:
lodsb
Home |
Main Index |
Thread Index |
Old Index