Port-sparc archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
bswap is slow on SPARC
Hio,
While benchmarking disk encryption/decryption on a 500MHz
UltraSPARC IIe, myself and riastradh noticed that there's a
bottleneck. For most NetBSD architectures, GCC's __builtin_bswapX
inserts fast inline MD code. For SPARC, SPARC64, and VAX, it falls
back to a function call.
When encryption algorithms expect to be able to encode little-endian
integers in a tight loop, that adds up to being *slow*. Especially
when NetBSD encrypts swap by default now.
By reusing the existing inline code, and avoiding the function call,
I found that disk decryption throughput increased by as much as 1 MiB/s
(that matters when it's only about 4 MiB/s to begin with).
You can test this yourself by setting up a cgd on a vnd, e.g.
# dd if=/dev/zero of=/testfile bs=1m count=1024
# vndconfig vnd0 /testfile
# cgdconfig -s cgd0 /dev/vnd0c adiantum 256 < /dev/urandom
# dd if=/dev/rcgd0 bs=512k | progress dd of=/dev/null bs=512k
Patch attached.
Index: sys/sys/bswap.h
===================================================================
RCS file: /cvsroot/src/sys/sys/bswap.h,v
retrieving revision 1.19
diff -u -p -r1.19 bswap.h
--- sys/sys/bswap.h 12 Mar 2015 15:28:16 -0000 1.19
+++ sys/sys/bswap.h 24 Nov 2025 13:21:43 -0000
@@ -24,20 +24,7 @@ __END_DECLS
#if defined(__GNUC__) && !defined(__lint__)
-/* machine/byte_swap.h might have defined inline versions */
-#ifndef __BYTE_SWAP_U64_VARIABLE
-#define __BYTE_SWAP_U64_VARIABLE bswap64
-#endif
-
-#ifndef __BYTE_SWAP_U32_VARIABLE
-#define __BYTE_SWAP_U32_VARIABLE bswap32
-#endif
-
-#ifndef __BYTE_SWAP_U16_VARIABLE
-#define __BYTE_SWAP_U16_VARIABLE bswap16
-#endif
-
-#define __byte_swap_u64_constant(x) \
+#define __byte_swap_u64(x) \
(__CAST(uint64_t, \
((((x) & 0xff00000000000000ull) >> 56) | \
(((x) & 0x00ff000000000000ull) >> 40) | \
@@ -48,29 +35,68 @@ __END_DECLS
(((x) & 0x000000000000ff00ull) << 40) | \
(((x) & 0x00000000000000ffull) << 56))))
-#define __byte_swap_u32_constant(x) \
+#define __byte_swap_u32(x) \
(__CAST(uint32_t, \
((((x) & 0xff000000) >> 24) | \
(((x) & 0x00ff0000) >> 8) | \
(((x) & 0x0000ff00) << 8) | \
(((x) & 0x000000ff) << 24))))
-#define __byte_swap_u16_constant(x) \
+#define __byte_swap_u16(x) \
(__CAST(uint16_t, \
((((x) & 0xff00) >> 8) | \
(((x) & 0x00ff) << 8))))
+/*
+ * The compiler always generates an expensive function call to bswap
+ * on some architectures, we want the inline versions there.
+ */
+#ifdef _BSWAP_IS_SLOW
+
+static __inline uint64_t __byte_swap_u64_inline(uint64_t x) {
+ return __byte_swap_u64(x);
+}
+
+static __inline uint32_t __byte_swap_u32_inline(uint32_t x) {
+ return __byte_swap_u32(x);
+}
+
+static __inline uint16_t __byte_swap_u16_inline(uint16_t x) {
+ return __byte_swap_u16(x);
+}
+
+#define __BYTE_SWAP_U64_VARIABLE __byte_swap_u64_inline
+#define __BYTE_SWAP_U32_VARIABLE __byte_swap_u32_inline
+#define __BYTE_SWAP_U16_VARIABLE __byte_swap_u16_inline
+
+#else
+
+/* allow machine/bswap.h to override these with inline versions */
+#ifndef __BYTE_SWAP_U64_VARIABLE
+#define __BYTE_SWAP_U64_VARIABLE bswap64
+#endif
+
+#ifndef __BYTE_SWAP_U32_VARIABLE
+#define __BYTE_SWAP_U32_VARIABLE bswap32
+#endif
+
+#ifndef __BYTE_SWAP_U16_VARIABLE
+#define __BYTE_SWAP_U16_VARIABLE bswap16
+#endif
+
+#endif /* _BSWAP_IS_SLOW */
+
#define bswap64(x) \
__CAST(uint64_t, __builtin_constant_p((x)) ? \
- __byte_swap_u64_constant(x) : __BYTE_SWAP_U64_VARIABLE(x))
+ __byte_swap_u64(x) : __BYTE_SWAP_U64_VARIABLE(x))
#define bswap32(x) \
__CAST(uint32_t, __builtin_constant_p((x)) ? \
- __byte_swap_u32_constant(x) : __BYTE_SWAP_U32_VARIABLE(x))
+ __byte_swap_u32(x) : __BYTE_SWAP_U32_VARIABLE(x))
#define bswap16(x) \
__CAST(uint16_t, __builtin_constant_p((x)) ? \
- __byte_swap_u16_constant(x) : __BYTE_SWAP_U16_VARIABLE(x))
+ __byte_swap_u16(x) : __BYTE_SWAP_U16_VARIABLE(x))
#endif /* __GNUC__ && !__lint__ */
#endif /* !_LOCORE */
Index: sys/arch/sparc/include/bswap.h
===================================================================
RCS file: /cvsroot/src/sys/arch/sparc/include/bswap.h,v
retrieving revision 1.2
diff -u -p -r1.2 bswap.h
--- sys/arch/sparc/include/bswap.h 21 Aug 1999 05:39:55 -0000 1.2
+++ sys/arch/sparc/include/bswap.h 24 Nov 2025 13:21:44 -0000
@@ -3,6 +3,12 @@
#ifndef _MACHINE_BSWAP_H_
#define _MACHINE_BSWAP_H_
+/*
+ * GCC fails to generate inline calls to bswapX on sparc and instead
+ * generates function calls.
+ */
+#define _BSWAP_IS_SLOW 1
+
#include <sys/bswap.h>
#endif /* !_MACHINE_BSWAP_H_ */
Index: sys/arch/sparc64/include/bswap.h
===================================================================
RCS file: /cvsroot/src/sys/arch/sparc64/include/bswap.h,v
retrieving revision 1.2
diff -u -p -r1.2 bswap.h
--- sys/arch/sparc64/include/bswap.h 21 Aug 1999 05:39:55 -0000 1.2
+++ sys/arch/sparc64/include/bswap.h 24 Nov 2025 13:21:44 -0000
@@ -3,6 +3,12 @@
#ifndef _MACHINE_BSWAP_H_
#define _MACHINE_BSWAP_H_
+/*
+ * GCC fails to generate inline calls to bswapX on sparc and instead
+ * generates function calls.
+ */
+#define _BSWAP_IS_SLOW 1
+
#include <sys/bswap.h>
#endif /* !_MACHINE_BSWAP_H_ */
Home |
Main Index |
Thread Index |
Old Index