Subject: Re: speeding up bzero
To: <>
From: David Laight <david@l8s.co.uk>
List: port-i386
Date: 04/12/2003 14:56:10
On Fri, Apr 11, 2003 at 03:56:07PM -0700, Jason Thorpe wrote:
>
> On Friday, April 11, 2003, at 01:43 PM, David Laight wrote:
>
> > My Athlon 700 gains about 1.5% on 8k aligned calls.
> > For 20 byte aligned transfers the gain is 38%
> > For 20 byte misaligned transfers the gain is 29%
>
> Can you make the same tweaks to memset()? That is what we prefer to
> use these days, after all :-)
I have a nice merged memset/bcopy...
I've also redone memcpy/memmove/bcopy giving a 30% improvement for
small copies.
Note this is still a 'rep movsl' based routine.
Some experiments done last year showed that significant improvements
were possible by using SSE2 instructions and/or avoiding movsl.
As a slight aside, what is the current ruling on whether the kernel
bcopy() has to behave like memcpy() or memmove()?
(wrt overlapped copies).
Also is there a mechanism to automatically update sys/lib/libkern/arch/*
from lib/libc/*/arch/*?
Or could the kernel use a 'reachover' makefile?
David
/* $NetBSD: memcpy.S,v 1.2 1998/01/09 03:45:07 perry Exp $ */
/*-
* Copyright (c) 1990 The Regents of the University of California.
* All rights reserved.
* ...
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS)
RCSID("$NetBSD: bcopy.S,v 1.11 2002/10/29 07:01:44 junyoung Exp $")
#endif
/*
* (ov)bcopy (src,dst,cnt)
* ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
*/
#ifdef BCOPY
ENTRY(bcopy)
#else
#ifdef MEMMOVE
ENTRY(memmove)
#else
#define MEMCPY
#define NO_OVERLAP
ENTRY(memcpy)
#endif
#endif
push %esi
mov %edi,%edx
#if defined(MEMCPY) || defined(MEMMOVE)
movl 8(%esp),%edi
movl 12(%esp),%esi
#else
movl 8(%esp),%esi
movl 12(%esp),%edi
#endif
movl 16(%esp),%ecx
#if defined(NO_OVERLAP)
movl %ecx,%eax
#else
movl %edi,%eax
subl %esi,%eax
cmpl %ecx,%eax /* overlapping? */
movl %ecx,%eax
jb backwards
#endif
cld /* nope, copy forwards. */
shrl $2,%ecx /* copy by words */
rep
movsl
and $3,%eax /* any bytes left? */
jnz trailing
done:
#if defined(MEMCPY) || defined(MEMMOVE)
movl 8(%esp),%eax
#endif
mov %edx,%edi
pop %esi
ret
trailing:
cmp $2,%eax
jb 1f
movw (%esi),%ax
movw %ax,(%edi)
je done
movb 2(%esi),%al
movb %al,2(%edi)
jmp done
1: movb (%esi),%al
movb %al,(%edi)
jmp done
#if !defined(NO_OVERLAP)
backwards:
addl %ecx,%edi /* copy backwards. */
addl %ecx,%esi
and $3,%eax /* any fractional bytes? */
jnz back_align
back_aligned:
shrl $2,%ecx
subl $4,%esi
subl $4,%edi
std
rep
movsl
cld
jmp done
back_align:
sub %eax,%esi
sub %eax,%edi
cmp $2,%eax
jb 1f
je 2f
movb 2(%esi),%al
movb %al,2(%edi)
2: movw (%esi),%ax
movw %ax,(%edi)
jmp back_aligned
1: movb (%esi),%al
movb %al,(%edi)
jmp back_aligned
#endif
--
David Laight: david@l8s.co.uk