Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/crypto/aes/arch/x86 Add some Intel intrinsics for ChaCha.



details:   https://anonhg.NetBSD.org/src/rev/22516c0a80c4
branches:  trunk
changeset: 936330:22516c0a80c4
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sat Jul 25 22:45:10 2020 +0000

description:
Add some Intel intrinsics for ChaCha.

_mm_load1_ps
_mm_loadu_si128
_mm_movelh_ps
_mm_slli_epi32
_mm_storeu_si128
_mm_unpackhi_epi32
_mm_unpacklo_epi32

diffstat:

 sys/crypto/aes/arch/x86/immintrin.h |  70 +++++++++++++++++++++++++++++++++++-
 1 files changed, 67 insertions(+), 3 deletions(-)

diffs (112 lines):

diff -r b9309887f90f -r 22516c0a80c4 sys/crypto/aes/arch/x86/immintrin.h
--- a/sys/crypto/aes/arch/x86/immintrin.h       Sat Jul 25 22:44:32 2020 +0000
+++ b/sys/crypto/aes/arch/x86/immintrin.h       Sat Jul 25 22:45:10 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: immintrin.h,v 1.4 2020/07/25 22:44:32 riastradh Exp $  */
+/*     $NetBSD: immintrin.h,v 1.5 2020/07/25 22:45:10 riastradh Exp $  */
 
 /*-
  * Copyright (c) 2020 The NetBSD Foundation, Inc.
@@ -103,6 +103,20 @@
 #endif
 
 _INTRINSATTR
+static __inline __m128
+_mm_load1_ps(const float *__p)
+{
+       return __extension__ (__m128)(__v4sf) { *__p, *__p, *__p, *__p };
+}
+
+_INTRINSATTR
+static __inline __m128i
+_mm_loadu_si128(const __m128i_u *__p)
+{
+       return ((const struct { __m128i_u __v; } _PACKALIAS *)__p)->__v;
+}
+
+_INTRINSATTR
 static __inline __m128i
 _mm_loadu_si32(const void *__p)
 {
@@ -132,8 +146,18 @@
 #if defined(__GNUC__) && !defined(__clang__)
        return (__m128)__builtin_ia32_movhlps((__v4sf)__v0, (__v4sf)__v1);
 #elif defined(__clang__)
-       return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1,
-           6, 7, 2, 3);
+       return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 6,7,2,3);
+#endif
+}
+
+_INTRINSATTR
+static __inline __m128
+_mm_movelh_ps(__m128 __v0, __m128 __v1)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+       return (__m128)__builtin_ia32_movlhps((__v4sf)__v0, (__v4sf)__v1);
+#elif defined(__clang__)
+       return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 0,1,4,5);
 #endif
 }
 
@@ -205,6 +229,13 @@
 
 _INTRINSATTR
 static __inline __m128i
+_mm_slli_epi32(__m128i __v, uint8_t __bits)
+{
+       return (__m128i)__builtin_ia32_pslldi128((__v4si)__v, (int)__bits);
+}
+
+_INTRINSATTR
+static __inline __m128i
 _mm_slli_epi64(__m128i __v, uint8_t __bits)
 {
        return (__m128i)__builtin_ia32_psllqi128((__v2di)__v, (int)__bits);
@@ -245,6 +276,13 @@
 
 _INTRINSATTR
 static __inline void
+_mm_storeu_si128(__m128i_u *__p, __m128i __v)
+{
+       ((struct { __m128i_u __v; } _PACKALIAS *)__p)->__v = __v;
+}
+
+_INTRINSATTR
+static __inline void
 _mm_storeu_si32(void *__p, __m128i __v)
 {
        ((struct { int32_t __v; } _PACKALIAS *)__p)->__v = ((__v4si)__v)[0];
@@ -273,6 +311,32 @@
 
 _INTRINSATTR
 static __inline __m128i
+_mm_unpackhi_epi32(__m128i __lo, __m128i __hi)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+       return (__m128i)__builtin_ia32_punpckhdq128((__v4si)__lo,
+           (__v4si)__hi);
+#elif defined(__clang__)
+       return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi,
+           2,6,3,7);
+#endif
+}
+
+_INTRINSATTR
+static __inline __m128i
+_mm_unpacklo_epi32(__m128i __lo, __m128i __hi)
+{
+#if defined(__GNUC__) && !defined(__clang__)
+       return (__m128i)__builtin_ia32_punpckldq128((__v4si)__lo,
+           (__v4si)__hi);
+#elif defined(__clang__)
+       return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi,
+           0,4,1,5);
+#endif
+}
+
+_INTRINSATTR
+static __inline __m128i
 _mm_unpacklo_epi64(__m128i __lo, __m128i __hi)
 {
 #if defined(__GNUC__) && !defined(__clang__)



Home | Main Index | Thread Index | Old Index