Source-Changes-D archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: CVS commit: src/common/lib/libc/arch/x86_64/string



Sorry for answering out of thread, but the message is long gone.
I just remembered very old x86 code of mine.

More than fifteen years ago it was still faster to go for 32-bit
wide testing when more than 20 bytes had to be compared, and to
redo a short byte loop to work around the fact that the carry flag
stats the wrong byteorder for the matching slot.
Maybe of interest.

 |Andrew Doran <ad%netbsd.org@localhost> writes:
 |> Hi,
 |>
 |> Change backed out.  Sorry about the disruption.

/// MEMCMP - sir (*)(const void *_ba, const void *_bb, uir _bytes)
#undef FUN
#undef FUN_STR
#define FUN		__XXXXXX_mem_Compare
#define FUN_STR	"sir XXXXXXX::Mem::Utils::Compare(const void*,const void*,uir)"
ASSERT_FUNVARS_STR()
NYD_FUNVARS_STR()
.global	G(FUN)
.type	G(FUN), @function
G(FUN):
	pushl %edi
	pushl %esi
.if __ALL
	GET_GOT()
	NYDIN()
.endif
.if SF_DEBUG
	movl PICSO(12)(%esp), %eax	// _ba
	testl %eax, %eax
	jnz 1f
	ASSERT_CRASH("_ba != NIL")
1:
	movl PICSO(16)(%esp), %eax	// _bb
	testl %eax, %eax
	jnz 2f
	ASSERT_CRASH("_bb != NIL")
2:
.endif
	// load args (_ba, _bb, _bytes)
	movl __PICSO(12)(%esp), %esi
	movl __PICSO(16)(%esp), %edi
	movl __PICSO(20)(%esp), %ecx
	cld				// forward cried the man from the rear
	xorl %edx, %edx			// default return
	cmpl $20, %ecx			// byte loop?
	jle 7f
1:	// align at least one on ui4 boundary; use a bytewise loop for that
	testl $3, %esi
	jz 2f
	cmpsb
	jne 8f				// query result (CF)
	decl %ecx
	jmp 1b
2:	// perform a uir loop; does not help us much due to the little endian
	// byte order, but gives us at least an equality indication..
	// (and is much faster than the byteloop ...)
	movl %ecx, %eax			// save bytecount
	shrl $2, %ecx			// >> Register::shift
	repz cmpsl
	jne 3f
	movb %al, %cl			// restore rem. bytecount
	andl $3, %ecx			// max. two bits remain (<= 3)
	jz 9f
	jmp 7f				// to the byte loop please
3:	// we have found an unequal slot, but CF aka the result is based on
	// the "wrong" byte order.  this is not easy to solve, thus simply
	// adjust the pointers and the count and restart the byte loop.
	// doing so is easier than the other thinkable approaches?
	movl $4, %eax			// avoid immediate ops..
	subl %eax, %esi
	subl %eax, %edi
	addl %eax, %ecx
7:	// byte loop
	repz cmpsb
	je 9f
8:	// have result, calculate it accordingly (edx is still 0)
	sbbl %edx, %edx			// a -= b+CF --> 0 || -1 (borrow sub)
	orb $1, %dl			// 1 or still -1
9:	// and finalize
	movl %edx, %eax			// overtake result into eax
.if __ALL
	NYDOUT()
	UNGET_GOT()
.endif
	popl %esi
	popl %edi
	ret
.size	G(FUN), .-G(FUN)
.align	16
// /__XXXXXX_mem_Compare

--steffen
|
|Der Kragenbaer,                The moon bear,
|der holt sich munter           he cheerfully and one by one
|einen nach dem anderen runter  wa.ks himself off
|(By Robert Gernhardt)


Home | Main Index | Thread Index | Old Index