Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/crypto/aes/arch/arm Add 32-bit load, store, and shift in...



details:   https://anonhg.NetBSD.org/src/rev/ec209052d3b9
branches:  trunk
changeset: 1012196:ec209052d3b9
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sat Jul 25 22:43:01 2020 +0000

description:
Add 32-bit load, store, and shift intrinsics.

vld1q_u32
vst1q_u32
vshlq_n_u32
vshrq_n_u32

diffstat:

 sys/crypto/aes/arch/arm/arm_neon.h |  80 +++++++++++++++++++++++++++++++++++++-
 1 files changed, 79 insertions(+), 1 deletions(-)

diffs (108 lines):

diff -r cc60904b880c -r ec209052d3b9 sys/crypto/aes/arch/arm/arm_neon.h
--- a/sys/crypto/aes/arch/arm/arm_neon.h        Sat Jul 25 22:42:31 2020 +0000
+++ b/sys/crypto/aes/arch/arm/arm_neon.h        Sat Jul 25 22:43:01 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: arm_neon.h,v 1.5 2020/07/25 22:42:31 riastradh Exp $   */
+/*     $NetBSD: arm_neon.h,v 1.6 2020/07/25 22:43:01 riastradh Exp $   */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -222,6 +222,30 @@
 #endif
 
 _INTRINSATTR
+static __inline uint32x4_t
+vld1q_u32(const uint32_t *__p32)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+#ifdef __aarch64__
+       const __builtin_aarch64_simd_si *__p =
+           (const __builtin_aarch64_simd_si *)__p32;
+
+       return (uint32x4_t)__builtin_aarch64_ld1v4si(__p);
+#else
+       const __builtin_neon_si *__p = (const __builtin_neon_si *)__p32;
+
+       return (uint32x4_t)__builtin_neon_vld1v4si(__p);
+#endif
+#elif defined(__clang__)
+       uint32x4_t __v = (uint32x4_t)__builtin_neon_vld1q_v(__p32, 50);
+#ifndef __LITTLE_ENDIAN__
+       __v = __builtin_shufflevector(__v, __v, 3,2,1,0);
+#endif
+       return __v;
+#endif
+}
+
+_INTRINSATTR
 static __inline uint8x16_t
 vld1q_u8(const uint8_t *__p8)
 {
@@ -383,6 +407,38 @@
 
 #if defined(__GNUC__) && !defined(__clang__)
 _INTRINSATTR
+static __inline uint32x4_t
+vshlq_n_u32(uint32x4_t __v, uint8_t __bits)
+{
+#ifdef __aarch64__
+       return (uint32x4_t)__builtin_aarch64_ashlv4si((int32x4_t)__v, __bits);
+#else
+       return (uint32x4_t)__builtin_neon_vshl_nv4si((int32x4_t)__v, __bits);
+#endif
+}
+#elif defined(__clang__)
+#define        vshlq_n_u32(__v, __bits)                                              \
+       (uint32x4_t)__builtin_neon_vshlq_n_v((int32x4_t)(__v), (__bits), 50)
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+_INTRINSATTR
+static __inline uint32x4_t
+vshrq_n_u32(uint32x4_t __v, uint8_t __bits)
+{
+#ifdef __aarch64__
+       return (uint32x4_t)__builtin_aarch64_lshrv4si((int32x4_t)__v, __bits);
+#else
+       return (uint32x4_t)__builtin_neon_vshru_nv4si((int32x4_t)__v, __bits);
+#endif
+}
+#elif defined(__clang__)
+#define        vshrq_n_u8(__v, __bits)                                               \
+       (uint32x4_t)__builtin_neon_vshrq_n_v((int32x4_t)(__v), (__bits), 50)
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+_INTRINSATTR
 static __inline uint8x16_t
 vshrq_n_u8(uint8x16_t __v, uint8_t __bits)
 {
@@ -432,6 +488,28 @@
 
 _INTRINSATTR
 static __inline void
+vst1q_u32(uint32_t *__p32, uint32x4_t __v)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+#ifdef __aarch64__
+       __builtin_aarch64_simd_si *__p = (__builtin_aarch64_simd_si *)__p32;
+
+       __builtin_aarch64_st1v4si(__p, (int32x4_t)__v);
+#else
+       __builtin_neon_si *__p = (__builtin_neon_si *)__p32;
+
+       __builtin_neon_vst1v4si(__p, (int32x4_t)__v);
+#endif
+#elif defined(__clang__)
+#ifndef __LITTLE_ENDIAN__
+       __v = __builtin_shufflevector(__v, __v, 3,2,1,0);
+#endif
+       __builtin_neon_vst1q_v(__p32, __v, 50);
+#endif
+}
+
+_INTRINSATTR
+static __inline void
 vst1q_u8(uint8_t *__p8, uint8x16_t __v)
 {
 #if defined(__GNUC__) && !defined(__clang__)



Home | Main Index | Thread Index | Old Index