Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch x86: Add kthread_fpu_enter/exit support, take two.



details:   https://anonhg.NetBSD.org/src/rev/b5cb4674fcae
branches:  trunk
changeset: 373749:b5cb4674fcae
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sat Feb 25 18:04:42 2023 +0000

description:
x86: Add kthread_fpu_enter/exit support, take two.

This time, make sure to restore the FPU state when switching to a
kthread in the middle of kthread_fpu_enter/exit.

This adds a single predicted-taken branch for the case of kthreads
that are not in kthread_fpu_enter/exit, so it incurs a penalty only
for threads that actually use it.  Since it avoids FPU state
switching in kthreads that do use the FPU, namely cgd worker threads,
this should be a net performance win on systems using it and have
negligible impact otherwise.

XXX pullup-10

diffstat:

 sys/arch/amd64/amd64/genassym.cf |   3 +-
 sys/arch/amd64/amd64/locore.S    |  19 ++++++++-
 sys/arch/x86/x86/fpu.c           |  82 +++++++++++++++++++++++++++++++++------
 3 files changed, 87 insertions(+), 17 deletions(-)

diffs (231 lines):

diff -r 1a218c656fd8 -r b5cb4674fcae sys/arch/amd64/amd64/genassym.cf
--- a/sys/arch/amd64/amd64/genassym.cf  Sat Feb 25 18:04:25 2023 +0000
+++ b/sys/arch/amd64/amd64/genassym.cf  Sat Feb 25 18:04:42 2023 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: genassym.cf,v 1.93 2022/12/27 08:40:40 msaitoh Exp $
+#      $NetBSD: genassym.cf,v 1.94 2023/02/25 18:04:42 riastradh Exp $
 
 #
 # Copyright (c) 1998, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -166,6 +166,7 @@
 define L_MD_ASTPENDING         offsetof(struct lwp, l_md.md_astpending)
 
 define LW_SYSTEM               LW_SYSTEM
+define LW_SYSTEM_FPU           LW_SYSTEM_FPU
 define MDL_IRET                MDL_IRET
 define MDL_COMPAT32            MDL_COMPAT32
 define MDL_FPU_IN_CPU          MDL_FPU_IN_CPU
diff -r 1a218c656fd8 -r b5cb4674fcae sys/arch/amd64/amd64/locore.S
--- a/sys/arch/amd64/amd64/locore.S     Sat Feb 25 18:04:25 2023 +0000
+++ b/sys/arch/amd64/amd64/locore.S     Sat Feb 25 18:04:42 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: locore.S,v 1.215 2022/12/26 17:46:00 christos Exp $    */
+/*     $NetBSD: locore.S,v 1.216 2023/02/25 18:04:42 riastradh Exp $   */
 
 /*
  * Copyright-o-rama!
@@ -1247,7 +1247,7 @@
 
        /* Don't bother with the rest if switching to a system process. */
        testl   $LW_SYSTEM,L_FLAG(%r12)
-       jnz     .Lswitch_return
+       jnz     .Lswitch_system
 
        /* Is this process using RAS (restartable atomic sequences)? */
        movq    L_PROC(%r12),%rdi
@@ -1336,6 +1336,21 @@
        popq    %r12
        popq    %rbx
        ret
+
+.Lswitch_system:
+       /*
+        * If it has LWP_SYSTEM_FPU set, meaning it's running in
+        * kthread_fpu_enter/exit, we need to restore the FPU state
+        * and enable FPU instructions with fpu_handle_deferred.
+        *
+        * No need to test MDL_FPU_IN_CPU via HANDLE_DEFERRED_FPU --
+        * fpu_switch guarantees it is clear, so we can just call
+        * fpu_handle_deferred unconditionally.
+        */
+       testl   $LW_SYSTEM_FPU,L_FLAG(%r12)
+       jz      .Lswitch_return
+       callq   _C_LABEL(fpu_handle_deferred)
+       jmp     .Lswitch_return
 END(cpu_switchto)
 
 /*
diff -r 1a218c656fd8 -r b5cb4674fcae sys/arch/x86/x86/fpu.c
--- a/sys/arch/x86/x86/fpu.c    Sat Feb 25 18:04:25 2023 +0000
+++ b/sys/arch/x86/x86/fpu.c    Sat Feb 25 18:04:42 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: fpu.c,v 1.81 2023/02/25 18:04:25 riastradh Exp $       */
+/*     $NetBSD: fpu.c,v 1.82 2023/02/25 18:04:42 riastradh Exp $       */
 
 /*
  * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.  All
@@ -96,7 +96,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.81 2023/02/25 18:04:25 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.82 2023/02/25 18:04:42 riastradh Exp $");
 
 #include "opt_multiprocessor.h"
 
@@ -107,6 +107,7 @@
 #include <sys/file.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
+#include <sys/kthread.h>
 #include <sys/sysctl.h>
 #include <sys/xcall.h>
 
@@ -131,13 +132,35 @@
 
 uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
 
+/*
+ * True if this a thread that is allowed to use the FPU -- either a
+ * user thread, or a system thread with LW_SYSTEM_FPU enabled.
+ */
+static inline bool
+lwp_can_haz_fpu(struct lwp *l)
+{
+
+       return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) != LW_SYSTEM;
+}
+
+/*
+ * True if this is a system thread with its own private FPU state.
+ */
+static inline bool
+lwp_system_fpu_p(struct lwp *l)
+{
+
+       return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) ==
+           (LW_SYSTEM|LW_SYSTEM_FPU);
+}
+
 static inline union savefpu *
 fpu_lwp_area(struct lwp *l)
 {
        struct pcb *pcb = lwp_getpcb(l);
        union savefpu *area = &pcb->pcb_savefpu;
 
-       KASSERT((l->l_flag & LW_SYSTEM) == 0);
+       KASSERT(lwp_can_haz_fpu(l));
        if (l == curlwp) {
                fpu_save();
        }
@@ -155,7 +178,7 @@
 
        s = splvm();
        if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
-               KASSERT((l->l_flag & LW_SYSTEM) == 0);
+               KASSERT(lwp_can_haz_fpu(l));
                fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32));
                l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
        }
@@ -314,7 +337,7 @@
            cpu_index(ci), ci->ci_ilevel);
 
        if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) {
-               KASSERT(!(oldlwp->l_flag & LW_SYSTEM));
+               KASSERT(lwp_can_haz_fpu(oldlwp));
                pcb = lwp_getpcb(oldlwp);
                fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features,
                    !(oldlwp->l_proc->p_flag & PK_32));
@@ -330,11 +353,11 @@
        union savefpu *fpu_save;
 
        /* Kernel threads have no FPU. */
-       if (__predict_false(l2->l_flag & LW_SYSTEM)) {
+       if (__predict_false(!lwp_can_haz_fpu(l2))) {
                return;
        }
        /* For init(8). */
-       if (__predict_false(l1->l_flag & LW_SYSTEM)) {
+       if (__predict_false(!lwp_can_haz_fpu(l1))) {
                memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size);
                return;
        }
@@ -358,6 +381,13 @@
 
 /* -------------------------------------------------------------------------- */
 
+static const union savefpu safe_fpu __aligned(64) = {
+       .sv_xmm = {
+               .fx_mxcsr = __SAFE_MXCSR__,
+       },
+};
+static const union savefpu zero_fpu __aligned(64);
+
 /*
  * fpu_kern_enter()
  *
@@ -373,15 +403,15 @@
 void
 fpu_kern_enter(void)
 {
-       static const union savefpu safe_fpu __aligned(64) = {
-               .sv_xmm = {
-                       .fx_mxcsr = __SAFE_MXCSR__,
-               },
-       };
        struct lwp *l = curlwp;
        struct cpu_info *ci;
        int s;
 
+       if (lwp_system_fpu_p(l) && !cpu_intr_p()) {
+               KASSERT(!cpu_softintr_p());
+               return;
+       }
+
        s = splvm();
 
        ci = curcpu();
@@ -427,10 +457,16 @@
 void
 fpu_kern_leave(void)
 {
-       static const union savefpu zero_fpu __aligned(64);
-       struct cpu_info *ci = curcpu();
+       struct cpu_info *ci;
        int s;
 
+       if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
+               KASSERT(!cpu_softintr_p());
+               return;
+       }
+
+       ci = curcpu();
+
 #if 0
        /*
         * Can't assert this because if the caller holds a spin lock at
@@ -459,6 +495,24 @@
        splx(s);
 }
 
+void
+kthread_fpu_enter_md(void)
+{
+
+       /* Enable the FPU by clearing CR0_TS, and enter a safe FPU state.  */
+       clts();
+       fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false);
+}
+
+void
+kthread_fpu_exit_md(void)
+{
+
+       /* Zero the FPU state and disable the FPU by setting CR0_TS.  */
+       fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false);
+       stts();
+}
+
 /* -------------------------------------------------------------------------- */
 
 /*



Home | Main Index | Thread Index | Old Index