Subject: Re: hton64
To: Perry E. Metzger <perry@piermont.com>
From: Todd Vierling <tv@pobox.com>
List: current-users
Date: 06/28/1997 18:57:34
On Sat, 28 Jun 1997, Perry E. Metzger wrote:

: > >better yet: create generic byte-in-Nbyteinteger swap functions.  Then
: > >define ntoh* and hton* to use them, if appropriate for the port.
: > 
: > Hear.  Hear.  Think of MD5 needing to byte swap on big-endian platforms.
: 
: Well, who is going to volunteer to spec the things and write them?

Here's a rough draft, and feel free to pick apart and make machine dependent
optimised source versions of these.  (A m68k version--in Motorola asm
format!--is attached.)  Comments?

=====

#include <machine/types.h>
#include <machine/endian.h>

/*
 * When swapping little/big endian, you truthfully could call _swap*()
 * directly.  But for completeness's sake, we have both directions as
 * separate macros; there _are_ processors out there with a '3412' and
 * '2143' byte order.
 */

#if BYTE_ORDER == LITTLE_ENDIAN
#define _le2h16(x) x
#define _le2h32(x) x
#define _le2h64(x) x
#define _h2le16(x) x
#define _h2le32(x) x
#define _h2le64(x) x
#define _be2h16(x) _swap16(x)
#define _be2h32(x) _swap32(x)
#define _be2h64(x) _swap64(x)
#define _h2be16(x) _swap16(x)
#define _h2be32(x) _swap32(x)
#define _h2be64(x) _swap64(x)
#define htons(x) _swap16(x)
#define ntohs(x) _swap16(x)
#define htonl(x) _swap32(x)
#define ntohl(x) _swap32(x)
#elif BYTE_ORDER == BIG_ENDIAN
#define _le2h16(x) _swap16(x)
#define _le2h32(x) _swap32(x)
#define _le2h64(x) _swap64(x)
#define _h2le16(x) _swap16(x)
#define _h2le32(x) _swap32(x)
#define _h2le64(x) _swap64(x)
#define _be2h16(x) x
#define _be2h32(x) x
#define _be2h64(x) x
#define _h2be16(x) x
#define _h2be32(x) x
#define _h2be64(x) x
#define htons(x) x
#define ntohs(x) x
#define htonl(x) x
#define ntohl(x) x
#else
#error huh?
#endif

__BEGIN_DECLS
int16_t _swap16(int16_t);
int32_t _swap32(int32_t);
int64_t _swap64(int64_t);
u_int8_t *_swapbytes(u_int8_t *, int);
__END_DECLS

#ifndef ASM_ROUTINES
#define carr(x) ((u_int8_t *)&x)

int16_t _swap16(int16_t x) {
	int16_t t;

	carr(t)[0] = carr(x)[1];
	carr(t)[1] = carr(x)[0];
	return t;
}

int32_t _swap32(int32_t x) {
	int32_t t;
#ifdef NO_LONG_CHARSWAP
	register u_int8_t *p = &(carr(x)[3]);
	register u_int8_t *q = carr(t);

	while (p >= carr(x))
		*q++ = *p--;
#else
	/* This can be faster on some systems. */
	carr(t)[0] = carr(x)[3];
	carr(t)[1] = carr(x)[2];
	carr(t)[2] = carr(x)[1];
	carr(t)[3] = carr(x)[0];
#endif
	return t;
}

int64_t _swap64(int64_t x) {
	int64_t t;
#ifdef NO_QUAD_CHARSWAP
	register u_int8_t *p = &(carr(x)[7]);
	register u_int8_t *q = carr(t);

	while (p >= carr(x))
		*q++ = *p--;
#else
	/* This can be faster on some systems--well, a long shot, but.... */
	carr(t)[0] = carr(x)[7];
	carr(t)[1] = carr(x)[6];
	carr(t)[2] = carr(x)[5];
	carr(t)[3] = carr(x)[4];
	carr(t)[4] = carr(x)[3];
	carr(t)[5] = carr(x)[2];
	carr(t)[6] = carr(x)[1];
	carr(t)[7] = carr(x)[0];
#endif
	return t;
}

/*
 * This is a byte number independent version of the two loop variations of
 * _swap32() and _swap64() above.  It does in-place swap, and REQUIRES
 * that 'num' be 0 modulus 2 (i.e, an even number).
 */

u_int8_t *_swapbytes(u_int8_t *x, int num) {
	register u_int8_t *q = &(x[num / 2]);
	register u_int8_t *p = (q - 1);
	register u_int8_t r;

	while (p >= x) {
		r = *p;
		*p-- = *q;
		*q++ = r;
	}
	return x;
}
#endif /* !ASM_ROUTINES */

=====

; Motorola format m68k assembler variations of __swap16(), __swap32(),
; __swap64(), and __swapbytes().  Sorry this isn't MIT style; this is an
; on the spot mental compile and optimization, and I don't know MIT syntax.
; :>

	code

	xdef	__swapbytes
	xdef	__swap16
	xdef	__swap32
	xdef	__swap64

__swapbytes:
	move.l	4(sp),a0	; get x
	move.l	a0,d0		; save x
	move.l	8(sp),d1	; get num
	lsr.l	#1,d1		; divide num by 2
	lea	0(a0,d1.l),a1	; get q in a1
	move.l	a1,a0		; get (p + 1) in a0
swbloop:
	cmpa.l	d0,a0		; (p + 1) <= x?
	ble.b	swbexit		; if so, exit
	move.b	-(a0),d1	; r = *--(p + 1)
	move.b	(a1),(a0)	; *p = *q
	move.b	d1,(a1)+	; *q++ = r
	bra.b	swbloop		; check again
swbexit:
	rts

; This method of injecting bytes into the registers may be a good
; optimisation of these functions on more than the m68k platform.  The C
; variations above do not propose this possibility, only byte moves.

__swap16:
	moveq	#0,d0		; clear d0
	move.b	7(sp),d0	; get carr(x)[1]
	lsl.l	#8,d0		; mooove over butter
	move.b	6(sp),d0	; get carr(x)[0]
	rts

__swap32:
	move.b	7(sp),d0	; get carr(x)[3]
	lsl.l	#8,d0		; mooove over butter
	move.b	6(sp),d0	; get carr(x)[2]
	lsl.l	#8,d0		; mooove over butter
	move.b	5(sp),d0	; get carr(x)[1]
	lsl.l	#8,d0		; mooove over butter
	move.b	4(sp),d0	; get carr(x)[0]
	rts

; I'm not quite sure of gcc's passing of 64 bit ints, but I believe this
; is correct.  (64 bit ints passed on the stack, and d0/d1 as the result.)

__swap64:
	move.b	11(sp),d1	; get carr(x)[7]
	lsl.l	#8,d1		; mooove over butter
	move.b	10(sp),d1	; get carr(x)[6]
	lsl.l	#8,d1		; mooove over butter
	move.b	9(sp),d1	; get carr(x)[5]
	lsl.l	#8,d1		; mooove over butter
	move.b	8(sp),d1	; get carr(x)[4]
	move.b	7(sp),d0	; get carr(x)[3]
	lsl.l	#8,d0		; mooove over butter
	move.b	6(sp),d0	; get carr(x)[2]
	lsl.l	#8,d0		; mooove over butter
	move.b	5(sp),d0	; get carr(x)[1]
	lsl.l	#8,d0		; mooove over butter
	move.b	4(sp),d0	; get carr(x)[0]
	rts

=====
== Todd Vierling (Personal tv@pobox.com; Business tv@iag.net) Foo-bar-baz! ==
== System administrator/technician, Internet Access Group, Orlando Florida ==
== Dialups in Orange, Volusia, Lake, Osceola counties - http://www.iag.net ==