Port-i386 archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

netbsd-5 branch kernel patch: Please test!



There is one additional change (from -current) that's being considered for inclusion into NetBSD 5.1, but as it doesn't apply 100% cleanly (though conflicts are minimal), I'd like more experienced eyes on it before including it.

I also ask that people please test this patch on a NetBSD 5.1_RC1 i386 kernel and put it through its paces, letting me know if any new problems crop up.

These are the changes implemented by the attached patch:

http://mail-index.netbsd.org/source-changes/2010/01/10/msg005240.html
http://mail-index.netbsd.org/source-changes/2010/01/10/msg005241.html
http://mail-index.netbsd.org/source-changes/2010/01/17/msg005529.html

Some initial tests indicate things are working well, but more input is gratefully accepted.

Thanks!
+j

Index: sys/arch/i386/i386/trap.c
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/trap.c,v
retrieving revision 1.241.4.1
diff -u -r1.241.4.1 trap.c
--- sys/arch/i386/i386/trap.c   27 Mar 2009 17:25:15 -0000      1.241.4.1
+++ sys/arch/i386/i386/trap.c   17 May 2010 19:07:40 -0000
@@ -131,6 +131,7 @@
 static inline int xmm_si_code(struct lwp *);
 void trap(struct trapframe *);
 void trap_tss(struct i386tss *, int, int);
+void trap_return_fault_return(struct trapframe *) __dead;
 
 #ifdef KVM86
 #include <machine/kvm86.h>
@@ -276,7 +277,7 @@
        struct proc *p;
        int type = frame->tf_trapno;
        struct pcb *pcb;
-       extern char fusubail[], kcopy_fault[], trapreturn[], IDTVEC(osyscall)[];
+       extern char fusubail[], kcopy_fault[], IDTVEC(osyscall)[];
        struct trapframe *vframe;
        ksiginfo_t ksi;
        void *onfault;
@@ -392,67 +393,76 @@
 
                /*
                 * Check for failure during return to user mode.
+                * This can happen loading invalid values into the segment
+                * registers, or during the 'iret' itself.
                 *
                 * We do this by looking at the instruction we faulted on.
                 * The specific instructions we recognize only happen when
                 * returning from a trap, syscall, or interrupt.
-                *
-                * At this point, there are (at least) two trap frames on
-                * the kernel stack; we presume here that we faulted while
-                * loading our registers out of the outer one.
                 */
+
+               KSI_INIT_TRAP(&ksi);
+               ksi.ksi_signo = SIGSEGV;
+               ksi.ksi_code = SEGV_ACCERR;
+               ksi.ksi_trap = type;
+
                switch (*(u_char *)frame->tf_eip) {
                case 0xcf:      /* iret */
-                       vframe = (void *)((int)&frame->tf_esp -
+                       /*
+                        * The 'iret' instruction faulted, so we have the
+                        * 'user' registers saved after the kernel %eip:%cs:%fl
+                        * of the 'iret' and below that the user %eip:%cs:%fl
+                        * the 'iret' was processing.
+                        * We must delete the 3 words of kernel return address
+                        * from the stack to generate a normal stack frame
+                        * (eg for sending a SIGSEGV).
+                        */
+                       vframe = (void *)((int *)frame + 3);
+                       if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
+                               goto we_re_toast;
+                       memmove(vframe, frame,
                            offsetof(struct trapframe, tf_eip));
+                       /* Set the faulting address to the user %eip */
+                       ksi.ksi_addr = (void *)vframe->tf_eip;
                        break;
                case 0x8e:
                        switch (*(uint32_t *)frame->tf_eip) {
-                       case 0x0c245c8e:        /* movl 0xc(%esp,1),%ds */
-                       case 0x0824448e:        /* movl 0x8(%esp,1),%es */
-                       case 0x0424648e:        /* movl 0x4(%esp,1),%fs */
-                       case 0x00246c8e:        /* movl 0x0(%esp,1),%gs */
+                       case 0x8e242c8e:        /* mov (%esp,%gs), then */
+                       case 0x0424648e:        /* mov 0x4(%esp),%fs */
+                       case 0x0824448e:        /* mov 0x8(%esp),%es */
+                       case 0x0c245c8e:        /* mov 0xc(%esp),%ds */
                                break;
                        default:
                                goto we_re_toast;
                        }
-                       vframe = (void *)(int)&frame->tf_esp;
+                       /*
+                        * We faulted loading one if the user segment registers.
+                        * The stack frame containing the user registers is
+                        * still valid and is just below the %eip:%cs:%fl of
+                        * the kernel fault frame.
+                        */
+                       vframe = (void *)(&frame->tf_eflags + 1);
+                       if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
+                               goto we_re_toast;
+                       /* There is no valid address for the fault */
                        break;
                default:
                        goto we_re_toast;
                }
-               if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
-                       goto we_re_toast;
-
                /*
-                * Arrange to signal the thread, which will reset its
-                * registers in the outer frame.  This also allows us to
-                * capture the invalid register state in sigcontext,
-                * packaged up with the signal delivery.  We restart
-                * on return at 'trapreturn', acting as if nothing
-                * happened, restarting the return to user with our new
-                * set of registers.
-                *
-                * Clear PSL_NT.  It can be set by userland because setting
-                * it isn't a privileged operation.
-                *
-                * Set PSL_I.  Otherwise, if SIGSEGV is ignored, we'll
-                * continue to generate traps infinitely with
-                * interrupts disabled.
+                * We might have faulted trying to execute the
+                * trampoline for a local (nested) signal handler.
+                * Only generate SIGSEGV if the user %cs isn't changed.
+                * (This is only strictly necessary in the 'iret' case.)
                 */
-               frame->tf_ds = GSEL(GDATA_SEL, SEL_KPL);
-               frame->tf_es = GSEL(GDATA_SEL, SEL_KPL);
-               frame->tf_gs = GSEL(GDATA_SEL, SEL_KPL);
-               frame->tf_fs = GSEL(GCPU_SEL, SEL_KPL);
-               frame->tf_eip = (uintptr_t)trapreturn;
-               frame->tf_eflags = (frame->tf_eflags & ~PSL_NT) | PSL_I;
-               KSI_INIT_TRAP(&ksi);
-               ksi.ksi_signo = SIGSEGV;
-               ksi.ksi_addr = (void *)rcr2();
-               ksi.ksi_code = SEGV_ACCERR;
-               ksi.ksi_trap = type & ~T_USER;
-               (*p->p_emul->e_trapsignal)(l, &ksi);
-               return;
+               if (!pmap_exec_fixup(&p->p_vmspace->vm_map, vframe, pcb)) {
+                       /* Save outer frame for any signal return */
+                       l->l_md.md_regs = vframe;
+                       (*p->p_emul->e_trapsignal)(l, &ksi);
+               }
+               /* Return to user by reloading the user frame */
+               trap_return_fault_return(vframe);
+               /* NOTREACHED */
 
        case T_PROTFLT|T_USER:          /* protection fault */
 #ifdef VM86
@@ -490,6 +500,23 @@
                case T_ALIGNFLT|T_USER:
                        ksi.ksi_code = BUS_ADRALN;
                        break;
+               case T_PROTFLT|T_USER:
+#ifdef VM86
+                       if (frame->tf_eflags & PSL_VM) {
+                               vm86_gpfault(l, type & ~T_USER);
+                               goto out;
+                       }
+#endif
+                       /*
+                        * If pmap_exec_fixup does something,
+                        * let's retry the trap.
+                        */
+                       if (pmap_exec_fixup(&p->p_vmspace->vm_map, frame, pcb)){
+                               goto out;
+                       }
+                       ksi.ksi_signo = SIGSEGV;
+                       ksi.ksi_code = SEGV_ACCERR;
+                       break;
                default:
                        KASSERT(1);
                        break;
Index: sys/arch/i386/i386/vector.S
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/vector.S,v
retrieving revision 1.42.6.2
diff -u -r1.42.6.2 vector.S
--- sys/arch/i386/i386/vector.S 4 Apr 2009 17:39:09 -0000       1.42.6.2
+++ sys/arch/i386/i386/vector.S 17 May 2010 19:07:40 -0000
@@ -1083,6 +1083,18 @@
        iret
        jmp     1b
 
+/*
+ * trap() calls here when it detects a fault in INTRFASTEXIT (loading the
+ * segment registers or during the iret itself).
+ * The address of the (possibly reconstructed) user trap frame is
+ * passed as an argument.
+ * Typically the code will have raised a SIGSEGV which will be actioned
+ * by the code below.
+ */
+_C_LABEL(trap_return_fault_return):    .globl  trap_return_fault_return
+       mov     4(%esp),%esp    /* frame for user return */
+       jmp     _C_LABEL(trapreturn)
+
 /* LINTSTUB: Ignore */
 NENTRY(alltraps)
        INTRENTRY


Home | Main Index | Thread Index | Old Index