Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/netbsd-6]: src/sys/arch/amd64 Pull up revisions:



details:   https://anonhg.NetBSD.org/src/rev/c7787aef83ef
branches:  netbsd-6
changeset: 774153:c7787aef83ef
user:      jdc <jdc%NetBSD.org@localhost>
date:      Sun Jun 03 21:45:10 2012 +0000

description:
Pull up revisions:
  src/sys/arch/amd64/include/frameasm.h revision 1.17-1.19
  src/sys/arch/amd64/amd64/vector.S revision 1.40-1.41
  src/sys/arch/amd64/amd64/trap.c revision 1.71
(requested by dsl in ticket #280).

Move all the XEN differences to a single conditional.
Merge the XEN/non-XEN versions of INTRFASTEXIT and
  INTR_RECURSE_HWFRAME by using extra defines.
Split INTRENTRY so that code can insert extra instructions
  inside user/kernel conditional.

Add a ';' that got deleted in a slight tidyup.

Rejig the way TRAP() and ZTRAP() are defined and add Z/TRAP_NJ() that
  excludes the 'jmp alltraps'.
Use the _NJ versions for trap entries with non-standard code.
Move all the KDTRACE_HOOKS code into a single block inside the
  IDTVEC(trap03) code. This removes a mis-predicted from every
  trap when KDTRACE_HOOKS are enabled.
Add a few blank lines, need some comments as well :-)
No functional changes intended.

Let the user of INTRENTRY_L() place a label on the 'swapgs' used
  when faulting from user space.

If we get a fault setting the user %gs, or on a iret that is returning
to userspace, we must do a 'swapgs' to reload the kernel %gs_base.
Also save the %ds, %es, %fs, %gs selector values in the frame so
they can be restored if we finally return to user (probably after
an application SIGSEGV handler has fixed the error).
Without this any such fault leaves the kernel running with the wrong
%gs offset and it will most likely fault again early in trap().
Repeats until the stack tramples on something important.
iret change works, invalid %gs is a little harder to arrange.

Treat traps in kernel mode during the 'return to user' iret sequence
as user faults.
Based heavily in the i386 code with the correct opcode bytes inserted.
iret path tested, arranging for segment register errors is harder.
User %fs and %gs (32bit apps) are loaded much earlier and any errors
will generate kernel panics - there is probably code to try to stop
the invalid values being set.

diffstat:

 sys/arch/amd64/amd64/trap.c       |  114 +++++++++++-------
 sys/arch/amd64/amd64/vector.S     |  224 +++++++++++++++++++++++--------------
 sys/arch/amd64/include/frameasm.h |   85 +++++--------
 3 files changed, 238 insertions(+), 185 deletions(-)

diffs (truncated from 638 to 300 lines):

diff -r 80db836ab20e -r c7787aef83ef sys/arch/amd64/amd64/trap.c
--- a/sys/arch/amd64/amd64/trap.c       Sun Jun 03 21:42:51 2012 +0000
+++ b/sys/arch/amd64/amd64/trap.c       Sun Jun 03 21:45:10 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: trap.c,v 1.69 2012/02/04 22:45:40 reinoud Exp $        */
+/*     $NetBSD: trap.c,v 1.69.2.1 2012/06/03 21:45:10 jdc Exp $        */
 
 /*-
  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.69 2012/02/04 22:45:40 reinoud Exp $");
+__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.69.2.1 2012/06/03 21:45:10 jdc Exp $");
 
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
@@ -122,6 +122,7 @@
 #endif
 
 void trap(struct trapframe *);
+void trap_return_fault_return(struct trapframe *) __dead;
 
 const char * const trap_type[] = {
        "privileged instruction fault",         /*  0 T_PRIVINFLT */
@@ -200,17 +201,14 @@
        struct lwp *l = curlwp;
        struct proc *p;
        struct pcb *pcb;
-       extern char fusuintrfailure[], kcopy_fault[],
-                   resume_iret[];
+       extern char fusuintrfailure[], kcopy_fault[];
        extern char IDTVEC(oosyscall)[];
        extern char IDTVEC(osyscall)[];
        extern char IDTVEC(syscall32)[];
-#if 0
-       extern char resume_pop_ds[], resume_pop_es[];
+#ifndef XEN
+       struct trapframe *vframe;
 #endif
-       struct trapframe *vframe;
        ksiginfo_t ksi;
-       void *resume;
        void *onfault;
        int type, error;
        uint64_t cr2;
@@ -312,50 +310,76 @@
 
                /*
                 * Check for failure during return to user mode.
-                *
-                * XXXfvdl check for rex prefix?
-                *
-                * We do this by looking at the instruction we faulted on.  The
-                * specific instructions we recognize only happen when
-                * returning from a trap, syscall, or interrupt.
+                * This can happen loading invalid values into the segment
+                * registers, or during the 'iret' itself.
                 *
-                * XXX
-                * The heuristic used here will currently fail for the case of
-                * one of the 2 pop instructions faulting when returning from a
-                * a fast interrupt.  This should not be possible.  It can be
-                * fixed by rearranging the trap frame so that the stack format
-                * at this point is the same as on exit from a `slow'
-                * interrupt.
+                * We do this by looking at the instruction we faulted on.
+                * The specific instructions we recognize only happen when
+                * returning from a trap, syscall, or interrupt.
+                */
+
+kernelfault:
+#ifdef XEN
+               /*
+                * XXX: there has to be an equivalent 'problem'
+                * but I (dsl) don't know exactly what happens!
+                * For now panic the kernel.
                 */
-               switch (*(u_char *)frame->tf_rip) {
-               case 0xcf:      /* iret */
-                       vframe = (void *)((uint64_t)&frame->tf_rsp - 44);
-                       resume = resume_iret;
+               goto we_re_toast;
+#else
+               KSI_INIT_TRAP(&ksi);
+               ksi.ksi_signo = SIGSEGV;
+               ksi.ksi_code = SEGV_ACCERR;
+               ksi.ksi_trap = type;
+
+               /* Get %rsp value before fault - there may be a pad word
+                * below the trap frame. */
+               vframe = (void *)frame->tf_rsp;
+               switch (*(uint16_t *)frame->tf_rip) {
+               case 0xcf48:    /* iretq */
+                       /*
+                        * The 'iretq' instruction faulted, wo we have the
+                        * 'user' registers saved after the kernel
+                        * %rip:%cs:%fl:%rsp:%ss of the iret, and below that
+                        * the user %rip:%cs:%fl:%rsp:%ss the 'iret' was
+                        * processing.
+                        * We must copy the user register back over the
+                        * kernel fault frame to generate a normal stack
+                        * frame (eg for sending a SIGSEGV).
+                        */
+                       vframe = (void *)((char *)vframe
+                           - offsetof(struct trapframe, tf_rip));
+                       memmove(vframe, frame,
+                           offsetof(struct trapframe, tf_rip));
+                       /* Set the faulting address to the user %eip */
+                       ksi.ksi_addr = (void *)vframe->tf_rip;
                        break;
-/*
- * XXXfvdl these are illegal in long mode (not in compat mode, though)
- * and we do not take back the descriptors from the signal context anyway,
- * but may do so later for USER_LDT, in which case we need to intercept
- * other instructions (movl %eax, %Xs).
- */
-#if 0
-               case 0x1f:      /* popl %ds */
-                       vframe = (void *)((uint64_t)&frame->tf_rsp - 4);
-                       resume = resume_pop_ds;
+               case 0x848e:    /* mov 0xa8(%rsp),%es (8e 84 24 a8 00 00 00) */
+               case 0x9c8e:    /* mov 0xb0(%rsp),%ds (8e 9c 24 b0 00 00 00) */
+                       /*
+                        * We faulted loading one if the user segment registers.
+                        * The stack frame containing the user registers is
+                        * still valid and pointed to by tf_rsp.
+                        * Maybe we should check the iretq follows.
+                        */
+                       if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
+                               goto we_re_toast;
+                       /* There is no valid address for the fault */
                        break;
-               case 0x07:      /* popl %es */
-                       vframe = (void *)((uint64_t)&frame->tf_rsp - 0);
-                       resume = resume_pop_es;
-                       break;
-#endif
+
                default:
                        goto we_re_toast;
                }
-               if (KERNELMODE(vframe->tf_cs, vframe->tf_rflags))
-                       goto we_re_toast;
 
-               frame->tf_rip = (uint64_t)resume;
-               return;
+               /* XXX: worry about on-stack trampolines for nested
+                * handlers?? */
+               /* Save outer frame for any signal return */
+               l->l_md.md_regs = vframe;
+               (*p->p_emul->e_trapsignal)(l, &ksi);
+               /* Return to user by reloading the user frame */
+               trap_return_fault_return(vframe);
+               /* NOTREACHED */
+#endif
 
        case T_PROTFLT|T_USER:          /* protection fault */
        case T_TSSFLT|T_USER:
@@ -613,7 +637,7 @@
                                goto copyfault;
                        printf("uvm_fault(%p, 0x%lx, %d) -> %x\n",
                            map, va, ftype, error);
-                       goto we_re_toast;
+                       goto kernelfault;
                }
                if (error == ENOMEM) {
                        ksi.ksi_signo = SIGKILL;
diff -r 80db836ab20e -r c7787aef83ef sys/arch/amd64/amd64/vector.S
--- a/sys/arch/amd64/amd64/vector.S     Sun Jun 03 21:42:51 2012 +0000
+++ b/sys/arch/amd64/amd64/vector.S     Sun Jun 03 21:45:10 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: vector.S,v 1.38.8.1 2012/04/23 16:34:16 riz Exp $      */
+/*     $NetBSD: vector.S,v 1.38.8.2 2012/06/03 21:45:10 jdc Exp $      */
 
 /*-
  * Copyright (c) 1998, 2007, 2008 The NetBSD Foundation, Inc.
@@ -100,40 +100,25 @@
 
 /*****************************************************************************/
 
-#ifndef        XEN
-#define PRE_TRAP
-#define        TRAP(a)         pushq $(a) ; jmp _C_LABEL(alltraps)
-#define        ZTRAP(a)        pushq $0 ; TRAP(a)
+#ifdef XEN
+#define        PRE_TRAP        movq (%rsp),%rcx ; movq 8(%rsp),%r11 ; addq $0x10,%rsp 
 #else
-#define        PRE_TRAP        movq (%rsp),%rcx ; movq 8(%rsp),%r11 ; addq $0x10,%rsp 
-#define        POST_TRAP(a)    pushq $(a) ; jmp _C_LABEL(alltraps)
-#define        TRAP(a)         PRE_TRAP ; POST_TRAP(a)
-#define ZTRAP(a)       PRE_TRAP ; pushq $0 ; POST_TRAP(a)
+#define        PRE_TRAP
 #endif
 
-#define        BPTTRAP(a)      ZTRAP(a)
+#define        TRAP_NJ(a)      PRE_TRAP ; pushq $(a)
+#define        ZTRAP_NJ(a)     PRE_TRAP ; pushq $0 ; pushq $(a)
+#define        TRAP(a)         TRAP_NJ(a) ; jmp _C_LABEL(alltraps)
+#define        ZTRAP(a)        ZTRAP_NJ(a) ; jmp _C_LABEL(alltraps)
 
-#ifdef KDTRACE_HOOKS
-       .bss
-       .globl  dtrace_invop_jump_addr
-       .align  8
-       .type   dtrace_invop_jump_addr, @object
-       .size   dtrace_invop_jump_addr, 8
-dtrace_invop_jump_addr:
-       .zero   8
-       .globl  dtrace_invop_calltrap_addr
-       .align  8
-       .type   dtrace_invop_calltrap_addr, @object
-       .size   dtrace_invop_calltrap_addr, 8
-dtrace_invop_calltrap_addr:
-       .zero   8
-#endif
        .text
 
 IDTVEC(trap00)
        ZTRAP(T_DIVIDE)
+
 IDTVEC(trap01)
-       BPTTRAP(T_TRCTRAP)
+       ZTRAP(T_TRCTRAP)
+
 IDTVEC(trap02)
 #if defined(XEN)
        ZTRAP(T_NMI)
@@ -167,18 +152,61 @@
        addq    $TF_REGSIZE+16,%rsp
        iretq
 #endif /* defined(XEN) */
+
 IDTVEC(trap03)
-       BPTTRAP(T_BPTFLT)
+#ifndef KDTRACE_HOOKS
+       ZTRAP(T_BPTFLT)
+#else
+       ZTRAP_NJ(T_BPTFLT)
+       INTRENTRY
+       STI(si)
+       /*
+        * DTrace Function Boundary Trace (fbt) probes are triggered
+        * by int3 (0xcc).
+        */
+       /* Check if there is no DTrace hook registered. */
+       cmpq    $0,dtrace_invop_jump_addr
+       je      calltrap
+
+       /*
+        * Set our jump address for the jump back in the event that
+        * the exception wasn't caused by DTrace at all.
+        */
+       /* XXX: This doesn't look right for SMP - unless it is a
+        * constant - so why set it everytime. (dsl) */
+       movq    $calltrap, dtrace_invop_calltrap_addr(%rip)
+
+       /* Jump to the code hooked in by DTrace. */
+       movq    dtrace_invop_jump_addr, %rax
+       jmpq    *dtrace_invop_jump_addr
+
+       .bss
+       .globl  dtrace_invop_jump_addr
+       .align  8
+       .type   dtrace_invop_jump_addr, @object
+       .size   dtrace_invop_jump_addr, 8
+dtrace_invop_jump_addr:
+       .zero   8
+       .globl  dtrace_invop_calltrap_addr
+       .align  8
+       .type   dtrace_invop_calltrap_addr, @object
+       .size   dtrace_invop_calltrap_addr, 8
+dtrace_invop_calltrap_addr:
+       .zero   8
+       .text
+#endif
+
 IDTVEC(trap04)
        ZTRAP(T_OFLOW)
+
 IDTVEC(trap05)
        ZTRAP(T_BOUND)
+
 IDTVEC(trap06)
        ZTRAP(T_PRIVINFLT)
+
 IDTVEC(trap07)
-       PRE_TRAP;
-       pushq   $0                      # dummy error code
-       pushq   $T_ASTFLT
+       ZTRAP_NJ(T_ASTFLT)
        INTRENTRY
 #ifdef DIAGNOSTIC
        movl    CPUVAR(ILEVEL),%ebx
@@ -186,34 +214,78 @@
        movq    CPUVAR(SELF),%rdi
        call    _C_LABEL(fpudna)
        jmp     .Lalltraps_checkusr
+



Home | Main Index | Thread Index | Old Index