Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/arch x86: Sprinkle extensive commentary about %fs/%gs in...



details:   https://anonhg.NetBSD.org/src/rev/83c56d4eef41
branches:  trunk
changeset: 377545:83c56d4eef41
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Sun Jul 16 19:55:43 2023 +0000

description:
x86: Sprinkle extensive commentary about %fs/%gs initialization.

Plus some other side quests like the three-stage GDT metamorphosis
lifecycle.

No functional change intended.

diffstat:

 sys/arch/amd64/amd64/locore.S  |  16 ++++++-
 sys/arch/amd64/amd64/machdep.c |  23 +++++++++-
 sys/arch/i386/i386/gdt.c       |  30 +++++++++++-
 sys/arch/i386/i386/locore.S    |  29 ++++++++++++-
 sys/arch/i386/i386/machdep.c   |  81 ++++++++++++++++++++++++++++++++++-
 sys/arch/x86/x86/cpu.c         |  93 +++++++++++++++++++++++++++++++++++++++++-
 sys/arch/x86/x86/pmap.c        |  18 ++++++-
 7 files changed, 272 insertions(+), 18 deletions(-)

diffs (truncated from 482 to 300 lines):

diff -r 4232ec27ee27 -r 83c56d4eef41 sys/arch/amd64/amd64/locore.S
--- a/sys/arch/amd64/amd64/locore.S     Sun Jul 16 19:09:07 2023 +0000
+++ b/sys/arch/amd64/amd64/locore.S     Sun Jul 16 19:55:43 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: locore.S,v 1.222 2023/06/24 05:31:04 msaitoh Exp $     */
+/*     $NetBSD: locore.S,v 1.223 2023/07/16 19:55:43 riastradh Exp $   */
 
 /*
  * Copyright-o-rama!
@@ -974,6 +974,20 @@ longmode_hi:
 
        movl    $VM_GUEST_XENPV, _C_LABEL(vm_guest)
 
+       /*
+        * Initialize cpu_info_primary.ci_self := &cpu_info_primary,
+        * and initialize some MSRs with
+        * cpu_init_msrs(&cpu_info_primary, full=true).  This sets up
+        * SYSCALL/SYSRET (XXX why?) and %fs/%gs, which is needed for
+        * the %gs-relative addressing used by CPUVAR(...), curcpu(),
+        * and curlwp.
+        *
+        * XXX Is it necessary to set cpu_info_primary.ci_self here?
+        * Isn't it statically initialized in x86/cpu.c?
+        *
+        * XXX Why do we immediately clear the segment registers just
+        * afterward?
+        */
        movq    $cpu_info_primary,%rdi
        movq    %rdi,CPU_INFO_SELF(%rdi) /* ci->ci_self = ci */
        movq    $1,%rsi
diff -r 4232ec27ee27 -r 83c56d4eef41 sys/arch/amd64/amd64/machdep.c
--- a/sys/arch/amd64/amd64/machdep.c    Sun Jul 16 19:09:07 2023 +0000
+++ b/sys/arch/amd64/amd64/machdep.c    Sun Jul 16 19:55:43 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: machdep.c,v 1.366 2022/10/26 23:38:06 riastradh Exp $  */
+/*     $NetBSD: machdep.c,v 1.367 2023/07/16 19:55:43 riastradh Exp $  */
 
 /*
  * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
@@ -110,7 +110,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.366 2022/10/26 23:38:06 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.367 2023/07/16 19:55:43 riastradh Exp $");
 
 #include "opt_modular.h"
 #include "opt_user_ldt.h"
@@ -1721,7 +1721,26 @@ init_x86_64(paddr_t first_avail)
 #ifdef SVS
        svs_init();
 #endif
+
+       /*
+        * Initialize MSRs on cpu0:
+        *
+        * - Enables SYSCALL/SYSRET.
+        *
+        * - Sets up %fs and %gs so that %gs points to the current
+        *   struct cpu_info as needed for CPUVAR(...), curcpu(), and
+        *   curlwp.
+        *
+        * - Enables the no-execute bit if supported.
+        *
+        * Thus, after this point, CPUVAR(...), curcpu(), and curlwp
+        * will work on cpu0.
+        *
+        * Note: The call to cpu_init_msrs for secondary CPUs happens
+        * in cpu_hatch.
+        */
        cpu_init_msrs(&cpu_info_primary, true);
+
 #ifndef XENPV
        cpu_speculation_init(&cpu_info_primary);
 #endif
diff -r 4232ec27ee27 -r 83c56d4eef41 sys/arch/i386/i386/gdt.c
--- a/sys/arch/i386/i386/gdt.c  Sun Jul 16 19:09:07 2023 +0000
+++ b/sys/arch/i386/i386/gdt.c  Sun Jul 16 19:55:43 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: gdt.c,v 1.73 2022/08/20 23:48:50 riastradh Exp $       */
+/*     $NetBSD: gdt.c,v 1.74 2023/07/16 19:55:43 riastradh Exp $       */
 
 /*
  * Copyright (c) 1996, 1997, 2009 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.73 2022/08/20 23:48:50 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: gdt.c,v 1.74 2023/07/16 19:55:43 riastradh Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_xen.h"
@@ -115,8 +115,30 @@ setgdt(int slot, const void *base, size_
 #endif
 
 /*
- * Initialize the GDT. We already have a gdtstore, which was temporarily used
- * by the bootstrap code. Now, we allocate a new gdtstore, and put it in cpu0.
+ * gdt_init()
+ *
+ *     Create a permanent Global Descriptor Table (GDT) for the
+ *     primary CPU.  This replaces the second tepmorary GDT that was
+ *     allocated in pmap_bootstrap with pmap_bootstrap_valloc and
+ *     pmap_bootstrap_palloc -- which in turn replaced the initial
+ *     temporary GDT allocated on the stack early at boot and
+ *     initialized with initgdt.
+ *
+ *     1. Allocate permanent space for the primary CPU's GDT with
+ *        uvm_km(9).
+ *
+ *     2. Copy the temporary GDT's contents over.  See initgdt for the
+ *        original initialization; it was copied from the initial
+ *        temporary GDT to the second temporary GDT in init386.
+ *
+ *     3. Make sure the GCPU_SEL segment descriptor points to
+ *        &cpu_info_primary.
+ *
+ *        XXX Is this necessary?  It appears to be redundant with
+ *        initgdt.
+ *
+ *     4. Load the permanent GDT address into the Global Descriptor
+ *        Table Register (GDTR) with LGDT (via gdt_init_cpu).
  */
 void
 gdt_init(void)
diff -r 4232ec27ee27 -r 83c56d4eef41 sys/arch/i386/i386/locore.S
--- a/sys/arch/i386/i386/locore.S       Sun Jul 16 19:09:07 2023 +0000
+++ b/sys/arch/i386/i386/locore.S       Sun Jul 16 19:55:43 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: locore.S,v 1.195 2023/05/14 09:05:38 riastradh Exp $   */
+/*     $NetBSD: locore.S,v 1.196 2023/07/16 19:55:43 riastradh Exp $   */
 
 /*
  * Copyright-o-rama!
@@ -128,7 +128,7 @@
  */
 
 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.195 2023/05/14 09:05:38 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.196 2023/07/16 19:55:43 riastradh Exp $");
 
 #include "opt_copy_symtab.h"
 #include "opt_ddb.h"
@@ -1071,6 +1071,31 @@ begin:
        call    _C_LABEL(multiboot2_post_reloc)
 #endif
 
+       /*
+        * Initialize a temporary GDT (Global Descriptor Table) on the
+        * stack and make the segment registers to use it.
+        *
+        * This creates a segment descriptor for the CPU-local segment
+        * and loads %fs with its segment selector to set up addressing
+        * for %fs.  Thus, after this point, CPUVAR(...), curcpu(), and
+        * curlwp will work.
+        *
+        * Later, we will replace this temporary GDT on the stack by a
+        * permanent GDT allocated with uvm_km in gdt_init.
+        *
+        * XXX Intel recommends ensuring the GDT address is aligned on
+        * an 8-byte boundary for performance.  Perhaps not an issue
+        * early at boot, but maybe worth doing?
+        *
+        *      Intel 64 and IA-32 Architectures, Software Developer's
+        *      Manual, Volume 3: System Programming Guide, Order
+        *      Number 325383, April 2022, Sec. 3.5.1 `Segment
+        *      Descriptor Tables', p. 3-15:
+        *
+        *              The base address of the GDT should be aligned
+        *              on an eight-byte boundary to yield the best
+        *              processor performance.
+        */
        subl    $NGDT*8, %esp           /* space for temporary gdt */
        pushl   %esp
        call    _C_LABEL(initgdt)
diff -r 4232ec27ee27 -r 83c56d4eef41 sys/arch/i386/i386/machdep.c
--- a/sys/arch/i386/i386/machdep.c      Sun Jul 16 19:09:07 2023 +0000
+++ b/sys/arch/i386/i386/machdep.c      Sun Jul 16 19:55:43 2023 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: machdep.c,v 1.839 2022/10/26 23:38:07 riastradh Exp $  */
+/*     $NetBSD: machdep.c,v 1.840 2023/07/16 19:55:43 riastradh Exp $  */
 
 /*
  * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009, 2017
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.839 2022/10/26 23:38:07 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.840 2023/07/16 19:55:43 riastradh Exp $");
 
 #include "opt_beep.h"
 #include "opt_compat_freebsd.h"
@@ -969,6 +969,41 @@ cpu_init_idt(struct cpu_info *ci)
        lidt(&region);
 }
 
+/*
+ * initgdt(tgdt)
+ *
+ *     Initialize a temporary Global Descriptor Table (GDT) using
+ *     storage space at tgdt.
+ *
+ *     1. Set up segment descriptors for our purposes, including a
+ *        CPU-local segment descriptor pointing at &cpu_info_primary.
+ *
+ *     2. Load the address into the Global Descriptor Table Register.
+ *
+ *     3. Set up segment selectors for all the segment registers using
+ *        it so that %fs-relative addressing works for the CPU-local
+ *        data.
+ *
+ *     After this put, CPUVAR(...), curcpu(), and curlwp will work.
+ *
+ *     Eventually the kernel will switch to a second temporary GDT
+ *     allocated with pmap_bootstrap_valloc in pmap_bootstrap, and
+ *     then to permanent GDT allocated with uvm_km(9) in gdt_init.
+ *     But the first temporary GDT is needed now to get us going with
+ *     early access to curcpu() and curlwp before we enter kernel
+ *     main.
+ *
+ *     XXX The purpose of each of the segment descriptors should be
+ *     written down somewhere in a single place that can be cross-
+ *     referenced.
+ *
+ *     References:
+ *
+ *     - Intel 64 and IA-32 Architectures Software Developer's Manual,
+ *       Volume 3: System Programming Guide, Order Number 325384,
+ *       April 2022, Sec. 3.5.1 `Segment Descriptor Tables',
+ *       pp. 3-14 through 3-16.
+ */
 void
 initgdt(union descriptor *tgdt)
 {
@@ -1165,7 +1200,15 @@ init386(paddr_t first_avail)
        uvm_lwp_setuarea(&lwp0, lwp0uarea);
 
        cpu_probe(&cpu_info_primary);
+
+       /*
+        * Initialize the no-execute bit on cpu0, if supported.
+        *
+        * Note: The call to cpu_init_msrs for secondary CPUs happens
+        * in cpu_hatch.
+        */
        cpu_init_msrs(&cpu_info_primary, true);
+
 #ifndef XENPV
        cpu_speculation_init(&cpu_info_primary);
 #endif
@@ -1332,7 +1375,25 @@ init386(paddr_t first_avail)
        idt_vec_init_cpu_md(iv, cpu_index(&cpu_info_primary));
        idt = (idt_descriptor_t *)iv->iv_idt;
 
-#ifndef XENPV  
+#ifndef XENPV
+       /*
+        * Switch from the initial temporary GDT that was allocated on
+        * the stack by our caller, start.  That temporary GDT will be
+        * popped off the stack when init386 returns before start calls
+        * main, so we need to use a second temporary GDT allocated in
+        * pmap_bootstrap with pmap_bootstrap_valloc/palloc to make
+        * sure at least the CPU-local data area, used by CPUVAR(...),
+        * curcpu(), and curlwp via %fs-relative addressing, will
+        * continue to work.
+        *
+        * Later, in gdt_init via cpu_startup, we will finally allocate
+        * a permanent GDT with uvm_km(9).
+        *
+        * The content of the second temporary GDT is the same as the
+        * content of the initial GDT, initialized in initgdt, except
+        * for the address of the LDT, which is also that we are also
+        * switching to a new temporary LDT at a new address.
+        */
        tgdt = gdtstore;
        gdtstore = (union descriptor *)gdt_vaddr;
        ldtstore = (union descriptor *)ldt_vaddr;
@@ -1390,10 +1451,22 @@ init386(paddr_t first_avail)
            GSEL(GCODE_SEL, SEL_KPL));
 
 #ifndef XENPV
+       /*
+        * Activate the second temporary GDT, allocated in
+        * pmap_bootstrap with pmap_bootstrap_valloc/palloc, and
+        * initialized with the content of the initial temporary GDT in
+        * initgdt, plus an updated LDT.
+        *
+        * This ensures the %fs-relative addressing for the CPU-local
+        * area used by CPUVAR(...), curcpu(), and curlwp will continue
+        * to work after init386 returns and the initial temporary GDT
+        * is popped off, before we call main and later create a
+        * permanent GDT in gdt_init via cpu_startup.
+        */
        setregion(&region, gdtstore, NGDT * sizeof(gdtstore[0]) - 1);
        lgdt(&region);
 #endif
-       
+



Home | Main Index | Thread Index | Old Index