tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Integration of LWP private data into ucontext



Hi all,
atached is a work-in-progress patch to include the LWP private data
pointer in ucontext. Platforms that don't require special pmap magic
define __lwp_getprivate_fast and __HAVE___LWP_GETPRIVATE_FAST in
machine/types.h, this is used by libpthread and will later be used by
the TLS support code in rtld and libc.

amd64, i386 and sh3 are tested and committable. This includes the change
to libpthread, which fixes the stack related issues on this platforms.

The mcontext part except __lwp_getprivate_fast on the platforms without
existing entry in mcontext needs another pass to ensure that the size
doesn't change, but is otherwise safe to commit. I don't plan to hook up
__lwp_getprivate_fast at this point or the changes like
cpu_lwp_setprivate. It needs careful checking and tests by someone with
the hardware. In many cases, the thread register is currently not
updated on context switch.

Someone with ARM and M68K knowledge has to decide if SMP support is
desirable and if yes, how to teach the pmap about cpu specific page.

The only platform left out is VAX since it doesn't have any space in the
mcontext and I don't know if any existing entry can be abused/reused.

Joerg
Index: src/lib/libc/arch/alpha/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/alpha/gen/_lwp.c
+++ src/lib/libc/arch/alpha/gen/_lwp.c
@@ -58,6 +58,9 @@
        gr[_REG_T12] = (unsigned long) start;
        gr[_REG_RA] = (unsigned long) _lwp_exit;
        gr[_REG_A0] = (unsigned long) arg;
        gr[_REG_SP] = ((unsigned long) (stack_base + stack_size)) & ~0x7;
        gr[_REG_S6] = 0;
+       gr[_REG_UNIQUE] = (unsigned long)private;
+
+       u->uc_flags |= _UC_UNIQUE;
 }

Index: src/lib/libc/arch/arm/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/arm/gen/_lwp.c
+++ src/lib/libc/arch/arm/gen/_lwp.c
@@ -66,6 +66,8 @@
 
        u->uc_mcontext.__gregs[_REG_R0] = (__greg_t) arg;
        u->uc_mcontext.__gregs[_REG_SP] = ((__greg_t) sp) & ~7;
        u->uc_mcontext.__gregs[_REG_LR] = (__greg_t) _lwp_exit;
        u->uc_mcontext.__gregs[_REG_PC] = (__greg_t) start;
+       u->uc_mcontext._mc_tlsbase = (uintptr_t)private;
+       u->uc_flags |= _UC_TLSBASE;
 }

Index: src/lib/libc/arch/hppa/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/hppa/gen/_lwp.c
+++ src/lib/libc/arch/hppa/gen/_lwp.c
@@ -67,6 +67,7 @@
        gr[_REG_PCOQH] = fp | HPPA_PC_PRIV_USER;
        gr[_REG_PCOQT] = (fp + 4) | HPPA_PC_PRIV_USER;
        gr[_REG_RP] = (__greg_t) _lwp_exit;
        gr[_REG_ARG0] = (__greg_t) arg;
        gr[_REG_SP] = (__greg_t) sp;
+       gr[_REG_CR27] = (__greg_t) private;
 }

Index: src/lib/libc/arch/i386/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/i386/gen/_lwp.c
+++ src/lib/libc/arch/i386/gen/_lwp.c
@@ -64,7 +64,8 @@
        *--sp = (void *) _lwp_exit;
        
        /* LINTED uintptr_t is safe */
        u->uc_mcontext.__gregs[_REG_UESP] = (uintptr_t) sp;
 
-       /* LINTED private is currently unused */
+       u->uc_mcontext._mc_tlsbase = (uintptr_t)private;
+       u->uc_flags |= _UC_TLSBASE;
 }

Index: src/lib/libc/arch/m68k/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/m68k/gen/_lwp.c
+++ src/lib/libc/arch/m68k/gen/_lwp.c
@@ -58,6 +58,8 @@
        
        *--sp = arg;
        *--sp = (void *) _lwp_exit;
 
        u->uc_mcontext.__gregs[_REG_A7] = (int) sp;
+       u->uc_mcontext._mc_tlsbase = (uintptr_t)private;
+       u->uc_flags |= _UC_TLSBASE;
 }

Index: src/lib/libc/arch/mips/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/mips/gen/_lwp.c
+++ src/lib/libc/arch/mips/gen/_lwp.c
@@ -59,6 +59,8 @@
        gr[_REG_EPC] = (unsigned long) start;
        gr[_REG_T9] = (unsigned long) start; /* required for .abicalls */
        gr[_REG_RA] = (unsigned long) _lwp_exit;
        gr[_REG_A0] = (unsigned long) arg;
        gr[_REG_SP] = (unsigned long) sp;
+       u->uc_mcontext._mc_tlsbase = (uintptr_t)private;
+       u->uc_flags |= _UC_TLSBASE;
 }

Index: src/lib/libc/arch/powerpc/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/powerpc/gen/_lwp.c
+++ src/lib/libc/arch/powerpc/gen/_lwp.c
@@ -62,6 +62,7 @@
 
        u->uc_mcontext.__gregs[3] = (int) arg;          /* arg1 */
        u->uc_mcontext.__gregs[1] = ((int) sp) - 12;    /* stack */
        u->uc_mcontext.__gregs[33] = (int) _lwp_exit;   /* LR */
        u->uc_mcontext.__gregs[34] = (int) start;       /* PC */
+       u->uc_mcontext.__gregs[_REG_R2] = (__greg_t) private;
 }

Index: src/lib/libc/arch/sh3/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/sh3/gen/_lwp.c
+++ src/lib/libc/arch/sh3/gen/_lwp.c
@@ -62,6 +62,7 @@
 
        u->uc_mcontext.__gregs[_REG_R4] = (__greg_t) arg;
        u->uc_mcontext.__gregs[_REG_SP] = ((__greg_t) sp) & ~3;
        u->uc_mcontext.__gregs[_REG_PR] = (__greg_t) _lwp_exit;
        u->uc_mcontext.__gregs[_REG_PC] = (__greg_t) start;
+       u->uc_mcontext.__gregs[_REG_GBR] = (__greg_t) private;
 }

Index: src/lib/libc/arch/sparc/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/sparc/gen/_lwp.c
+++ src/lib/libc/arch/sparc/gen/_lwp.c
@@ -63,11 +63,12 @@
        gr[_REG_PC] = (ulong) start;
        gr[_REG_nPC] = (ulong) start + 4;
        gr[_REG_O0] = (ulong)arg;
        gr[_REG_O6] = (ulong)sp;
        gr[_REG_O7] = (ulong)_lwp_exit - 8;
+       gr[_REG_G7] = (ulong)private;
 
        /* XXX: uwe: why do we need this? */
        /* create loopback in the window save area on the stack? */
        sp[8+6] = (ulong)sp;            /* %i6 */
        sp[8+7] = (ulong)_lwp_exit - 8; /* %i7 */
 }

Index: src/lib/libc/arch/sparc64/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/sparc64/gen/_lwp.c
+++ src/lib/libc/arch/sparc64/gen/_lwp.c
@@ -68,6 +68,7 @@
        gr[_REG_nPC] = (ulong) start + 4;
 
        gr[_REG_O0] = (ulong) arg;
        gr[_REG_O6] = (ulong) sp;
        gr[_REG_O7] = (ulong)_lwp_exit - 8;
+       gr[_REG_G7] = (ulong)private;
 }

Index: src/lib/libc/arch/x86_64/gen/_lwp.c
===================================================================
--- src/lib/libc/arch/x86_64/gen/_lwp.c
+++ src/lib/libc/arch/x86_64/gen/_lwp.c
@@ -64,7 +64,8 @@
        *--sp = (void *) _lwp_exit;
        
        /* LINTED uintptr_t is safe */
        gr[_REG_URSP] = (uintptr_t) sp;
 
-       /* LINTED private is currently unused */
+       u->uc_mcontext._mc_tlsbase = (uintptr_t)private;
+       u->uc_flags |= _UC_TLSBASE;
 }

Index: src/lib/libpthread/arch/i386/pthread_md.h
===================================================================
--- src/lib/libpthread/arch/i386/pthread_md.h
+++ src/lib/libpthread/arch/i386/pthread_md.h
@@ -73,29 +73,13 @@
                (ucp)->uc_mcontext.__gregs[_REG_EFL] =                  \
                    ucur.uc_mcontext.__gregs[_REG_EFL];                 \
        } while (/*CONSTCOND*/0);
 
 #define        pthread__smt_pause()    __asm __volatile("rep; nop" ::: 
"memory")
-/*     #define PTHREAD__HAVE_THREADREG */
 
 /* Don't need additional memory barriers. */
 #define        PTHREAD__ATOMIC_IS_MEMBAR
-
-static inline pthread_t
-#ifdef __GNUC__
-__attribute__ ((__const__))
-#endif
-pthread__threadreg_get(void)
-{
-       pthread_t self;
-
-       __asm volatile("movl %%gs:0, %0"
-               : "=r" (self)
-               :);
-
-       return self;
-}
 
 static inline void *
 _atomic_cas_ptr(volatile void *ptr, void *old, void *new)
 {
        volatile uintptr_t *cast = ptr;

Index: src/lib/libpthread/pthread_int.h
===================================================================
--- src/lib/libpthread/pthread_int.h
+++ src/lib/libpthread/pthread_int.h
@@ -248,12 +248,16 @@
        (ucp)->uc_flags = _UC_CPU | _UC_STACK;                          \
        _INITCONTEXT_U_MD(ucp)                                          \
        } while (/*CONSTCOND*/0)
 
 
-#ifdef PTHREAD__HAVE_THREADREG
-#define        pthread__self()         pthread__threadreg_get()
+#ifdef __HAVE___LWP_GETPRIVATE_FAST
+static inline pthread_t __constfunc
+pthread__self(void)
+{
+       return (pthread_t)__lwp_getprivate_fast();
+}
 #else
 /* Stack location of pointer to a particular thread */
 extern vaddr_t pthread__mainbase;
 extern vaddr_t pthread__mainstruct;
 static inline pthread_t

Index: src/sys/arch/alpha/alpha/machdep.c
===================================================================
--- src/sys/arch/alpha/alpha/machdep.c
+++ src/sys/arch/alpha/alpha/machdep.c
@@ -1926,16 +1926,12 @@
                else
                        pcb->pcb_hw.apcb_usp = gr[_REG_SP];
                frame->tf_regs[FRAME_PC] = gr[_REG_PC];
                frame->tf_regs[FRAME_PS] = gr[_REG_PS];
        }
-       if (flags & _UC_UNIQUE) {
-               if (l == curlwp)
-                       alpha_pal_wrunique(gr[_REG_UNIQUE]);
-               else
-                       pcb->pcb_hw.apcb_unique = gr[_REG_UNIQUE];
-       }
+       if (flags & _UC_UNIQUE)
+               lwp_setprivate(l, (void *)(uintptr_t)gr[_REG_UNIQUE]);
        /* Restore floating point register context, if any. */
        if (flags & _UC_FPU) {
                /* If we have an FP register context, get rid of it. */
                if (pcb->pcb_fpcpu != NULL)
                        fpusave_proc(l, 0);

Index: src/sys/arch/alpha/alpha/sys_machdep.c
===================================================================
--- src/sys/arch/alpha/alpha/sys_machdep.c
+++ src/sys/arch/alpha/alpha/sys_machdep.c
@@ -236,7 +236,9 @@
 {
        struct pcb *pcb;
 
        pcb = lwp_getpcb(l);
        pcb->pcb_hw.apcb_unique = (unsigned long)addr;
+       if (l == curlwp)
+               alpha_pal_wrunique(pcb->pcb_hw.apcb_unique);
        return 0;
 }

Index: src/sys/arch/alpha/include/mcontext.h
===================================================================
--- src/sys/arch/alpha/include/mcontext.h
+++ src/sys/arch/alpha/include/mcontext.h
@@ -98,7 +98,19 @@
 #define _UC_MACHINE_SP(uc)     ((uc)->uc_mcontext.__gregs[_REG_SP])
 #define _UC_MACHINE_PC(uc)     ((uc)->uc_mcontext.__gregs[_REG_PC])
 #define _UC_MACHINE_INTRV(uc)  ((uc)->uc_mcontext.__gregs[_REG_V0])
 
 #define        _UC_MACHINE_SET_PC(uc, pc)      _UC_MACHINE_PC(uc) = (pc)
+
+static inline void *
+__lwp_getprivate_fast(void)
+{
+       register void *__tmp __asm("$0");
+
+       __asm volatile("call_pal %1 # PAL_rdunique"
+               : "=r" (__tmp)
+               : "i" (0x009e /* PAL_rdunique */));
+
+       return __tmp;
+}
 
 #endif /* !_ALPHA_MCONTEXT_H_ */

Index: src/sys/arch/alpha/include/types.h
===================================================================
--- src/sys/arch/alpha/include/types.h
+++ src/sys/arch/alpha/include/types.h
@@ -74,11 +74,12 @@
 #define        __HAVE_SYSCALL_INTERN
 #define        __HAVE_MINIMAL_EMUL
 #define        __HAVE_AST_PERPROC
 #define        __HAVE_ATOMIC64_OPS
 #define        __HAVE_CPU_LWP_SETPRIVATE
+#define        __HAVE___LWP_GETPRIVATE_FAST
 
 #if defined(_KERNEL)
 #define        __HAVE_RAS
 #endif
 
 #endif /* _MACHTYPES_H_ */

Index: src/sys/arch/amd64/amd64/machdep.c
===================================================================
--- src/sys/arch/amd64/amd64/machdep.c
+++ src/sys/arch/amd64/amd64/machdep.c
@@ -1605,10 +1605,13 @@
            (void *) mcp->__gregs[_REG_RIP])) != -1)
                mcp->__gregs[_REG_RIP] = ras_rip;
 
        *flags |= _UC_CPU;
 
+       mcp->_mc_tlsbase = (uintptr_t)l->l_private;;
+       *flags |= _UC_TLSBASE;
+
        if ((l->l_md.md_flags & MDP_USEDFPU) != 0) {
                struct pcb *pcb = lwp_getpcb(l);
 
                if (pcb->pcb_fpcpu) {
                        fpusave_lwp(l, true);
@@ -1671,10 +1674,13 @@
        if ((flags & _UC_FPU) != 0) {
                memcpy(&pcb->pcb_savefpu.fp_fxsave, mcp->__fpregs,
                    sizeof (mcp->__fpregs));
                l->l_md.md_flags |= MDP_USEDFPU;
        }
+
+       if ((flags & _UC_TLSBASE) != 0)
+               lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
 
        mutex_enter(p->p_lock);
        if (flags & _UC_SETSTACK)
                l->l_sigstk.ss_flags |= SS_ONSTACK;
        if (flags & _UC_CLRSTACK)

Index: src/sys/arch/amd64/amd64/netbsd32_machdep.c
===================================================================
--- src/sys/arch/amd64/amd64/netbsd32_machdep.c
+++ src/sys/arch/amd64/amd64/netbsd32_machdep.c
@@ -155,11 +155,11 @@
 
        tf = l->l_md.md_regs;
        tf->tf_ds = LSEL(LUDATA32_SEL, SEL_UPL);
        tf->tf_es = LSEL(LUDATA32_SEL, SEL_UPL);
        cpu_fsgs_zero(l);
-       cpu_fsgs_reload(l, tf->tf_ds, tf->tf_ds);
+       cpu_fsgs_reload(l, tf->tf_ds, tf->tf_es);
        tf->tf_rdi = 0;
        tf->tf_rsi = 0;
        tf->tf_rbp = 0;
        tf->tf_rbx = (uint64_t)p->p_psstr;
        tf->tf_rdx = 0;
@@ -857,10 +857,13 @@
                tf->tf_cs     = gr[_REG32_CS];
                tf->tf_rsp    = gr[_REG32_UESP];
                tf->tf_ss     = gr[_REG32_SS];
        }
 
+       if ((flags & _UC_TLSBASE) != 0)
+               lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
+
        /* Restore floating point register context, if any. */
        if ((flags & _UC_FPU) != 0) {
                struct pcb *pcb = lwp_getpcb(l);
 
                /*
@@ -868,11 +871,11 @@
                 */
                if (pcb->pcb_fpcpu != NULL) {
                        fpusave_lwp(l, false);
                }
                memcpy(&pcb->pcb_savefpu.fp_fxsave, &mcp->__fpregs,
-                   sizeof (mcp->__fpregs));
+                   sizeof (pcb->pcb_savefpu.fp_fxsave));
                /* If not set already. */
                l->l_md.md_flags |= MDP_USEDFPU;
        }
 
        mutex_enter(p->p_lock);
@@ -917,19 +920,22 @@
            (void *) (uintptr_t)gr[_REG32_EIP])) != -1)
                gr[_REG32_EIP] = ras_eip;
 
        *flags |= _UC_CPU;
 
+       mcp->_mc_tlsbase = (uint32_t)(uintptr_t)l->l_private;
+       *flags |= _UC_TLSBASE;
+
        /* Save floating point register context, if any. */
        if ((l->l_md.md_flags & MDP_USEDFPU) != 0) {
                struct pcb *pcb = lwp_getpcb(l);
 
                if (pcb->pcb_fpcpu) {
                        fpusave_lwp(l, true);
                }
                memcpy(&mcp->__fpregs, &pcb->pcb_savefpu.fp_fxsave,
-                   sizeof (mcp->__fpregs));
+                   sizeof (pcb->pcb_savefpu.fp_fxsave));
                *flags |= _UC_FPU;
        }
 }
 
 void

Index: src/sys/arch/amd64/include/mcontext.h
===================================================================
--- src/sys/arch/amd64/include/mcontext.h
+++ src/sys/arch/amd64/include/mcontext.h
@@ -60,11 +60,11 @@
  * within ucontext_t.
  */
 
 typedef struct {
        __gregset_t     __gregs;
-       long            __pad;
+       __greg_t        _mc_tlsbase;
        __fpregset_t    __fpregs;
 } mcontext_t;
 
 #define _UC_UCONTEXT_ALIGN     (~0xf)
 
@@ -72,16 +72,27 @@
 #define _UC_MACHINE_PC(uc)     ((uc)->uc_mcontext.__gregs[_REG_RIP])
 #define _UC_MACHINE_INTRV(uc)  ((uc)->uc_mcontext.__gregs[_REG_RAX])
 
 #define        _UC_MACHINE_SET_PC(uc, pc)      _UC_MACHINE_PC(uc) = (pc)
 
+#define        _UC_TLSBASE     0x00080000
+
 /*
  * mcontext extensions to handle signal delivery.
  */
 #define _UC_SETSTACK   0x00010000
 #define _UC_CLRSTACK   0x00020000
 
+static inline void *
+__lwp_getprivate_fast(void)
+{
+       void *__tmp;
+
+       __asm volatile("movq %%fs:0, %0" : "=r" (__tmp));
+
+       return __tmp;
+}
 
 #ifdef _KERNEL
 
 /*
  * 32bit context definitions.
@@ -114,18 +125,35 @@
 #define _UC_MACHINE32_SP(uc)   ((uc)->uc_mcontext.__gregs[_REG32_UESP])
 
 /*
  * Floating point register state
  */
-typedef struct fxsave64 __fpregset32_t;
+typedef struct {
+       union {
+               struct {
+                       int     __fp_state[27]; /* Environment and registers */
+                       int     __fp_status;    /* Software status word */
+               } __fpchip_state;
+               struct {
+                       char    __fp_emul[246];
+                       char    __fp_epad[2];
+               } __fp_emul_space;
+               struct {
+                       char    __fp_xmm[512];
+               } __fp_xmm_state;
+               int     __fp_fpregs[128];
+       } __fp_reg_set;
+       int     __fp_wregs[33];                 /* Weitek? */
+} __fpregset32_t;
 
 typedef struct {
        __gregset32_t   __gregs;
        __fpregset32_t  __fpregs;
+       uint32_t        _mc_tlsbase;
 } mcontext32_t;
 
-#define _UC_MACHINE_PAD32      5
+#define _UC_MACHINE_PAD32      4
 
 struct trapframe;
 struct lwp;
 int check_mcontext(struct lwp *, const mcontext_t *, struct trapframe *);
 

Index: src/sys/arch/amd64/include/types.h
===================================================================
--- src/sys/arch/amd64/include/types.h
+++ src/sys/arch/amd64/include/types.h
@@ -83,10 +83,11 @@
 #define        __HAVE_SYSCALL_INTERN
 #define        __HAVE_MINIMAL_EMUL
 #define        __HAVE_ATOMIC64_OPS
 #define        __HAVE_ATOMIC_AS_MEMBAR
 #define        __HAVE_CPU_LWP_SETPRIVATE
+#define        __HAVE___LWP_GETPRIVATE_FAST
 #define        __HAVE_INTR_CONTROL
 
 #ifdef _KERNEL_OPT
 #include "opt_xen.h"
 #define        __HAVE_RAS

Index: src/sys/arch/arm/arm/sig_machdep.c
===================================================================
--- src/sys/arch/arm/arm/sig_machdep.c
+++ src/sys/arch/arm/arm/sig_machdep.c
@@ -197,10 +197,13 @@
 #ifdef ARMFPE
        /* Save Floating Point Register context. */
        arm_fpe_getcontext(p, (struct fpreg *)(void *)&mcp->fpregs);
        *flags |= _UC_FPU;
 #endif
+
+       mcp->_mc_tlsbase = (uintptr_t)l->l_private;
+       *flags |= _UC_TLSBASE;
 }
 
 int
 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
 {
@@ -237,10 +240,13 @@
        if ((flags & _UC_FPU) != 0) {
                /* Restore Floating Point Register context. */
                arm_fpe_setcontext(p, (struct fpreg *)(void *)&mcp->__fpregs);
        }
 #endif
+
+       if ((flags & _UC_TLSBASE) != 0)
+               lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
 
        mutex_enter(p->p_lock);
        if (flags & _UC_SETSTACK)
                l->l_sigstk.ss_flags |= SS_ONSTACK;
        if (flags & _UC_CLRSTACK)

Index: src/sys/arch/arm/include/mcontext.h
===================================================================
--- src/sys/arch/arm/include/mcontext.h
+++ src/sys/arch/arm/include/mcontext.h
@@ -85,23 +85,26 @@
        __gregset_t     __gregs;
        union {
                __fpregset_t __fpregs;
                __vfpregset_t __vfpregs;
        } __fpu;
+       __greg_t        _mc_tlsbase;
 } mcontext_t;
 
 /* Machine-dependent uc_flags */
 #define        _UC_ARM_VFP     0x00010000      /* FPU field is VFP */
 
 /* used by signal delivery to indicate status of signal stack */
 #define _UC_SETSTACK   0x00020000
 #define _UC_CLRSTACK   0x00040000
 
-#define _UC_MACHINE_PAD        3               /* Padding appended to 
ucontext_t */
+#define        _UC_TLSBASE     0x00080000
+
+#define _UC_MACHINE_PAD        2               /* Padding appended to 
ucontext_t */
 
 #define _UC_MACHINE_SP(uc)     ((uc)->uc_mcontext.__gregs[_REG_SP])
 #define _UC_MACHINE_PC(uc)     ((uc)->uc_mcontext.__gregs[_REG_PC])
 #define _UC_MACHINE_INTRV(uc)  ((uc)->uc_mcontext.__gregs[_REG_R0])
 
 #define        _UC_MACHINE_SET_PC(uc, pc)      _UC_MACHINE_PC(uc) = (pc)
 
 #endif /* !_ARM_MCONTEXT_H_ */

Index: src/sys/arch/hppa/hppa/hppa_machdep.c
===================================================================
--- src/sys/arch/hppa/hppa/hppa_machdep.c
+++ src/sys/arch/hppa/hppa/hppa_machdep.c
@@ -186,13 +186,13 @@
        gr[_REG_SR0] = tf->tf_sr0;
        gr[_REG_SR1] = tf->tf_sr1;
        gr[_REG_SR2] = tf->tf_sr2;
        gr[_REG_SR3] = tf->tf_sr3;
        gr[_REG_SR4] = tf->tf_sr4;
+       gr[_REG_CR27] = tf->tf_cr27;
 #if 0
        gr[_REG_CR26] = tf->tf_cr26;
-       gr[_REG_CR27] = tf->tf_cr27;
 #endif
 
        ras_pc = (__greg_t)ras_lookup(l->l_proc,
            (void *)(gr[_REG_PCOQH] & ~HPPA_PC_PRIV_MASK));
        if (ras_pc != -1) {
@@ -301,18 +301,20 @@
                        tf->tf_iioq_tail &= ~HPPA_PC_PRIV_MASK;
                } else {
                        tf->tf_iioq_tail |= HPPA_PC_PRIV_USER;
                }
 
+               lwp_setprivate(l, (void *)(uintptr_t)gr[_REG_CR27]);
+               tf->tf_cr27     = gr[_REG_CR27];
+
 #if 0
                tf->tf_sr0      = gr[_REG_SR0];
                tf->tf_sr1      = gr[_REG_SR1];
                tf->tf_sr2      = gr[_REG_SR2];
                tf->tf_sr3      = gr[_REG_SR3];
                tf->tf_sr4      = gr[_REG_SR4];
                tf->tf_cr26     = gr[_REG_CR26];
-               tf->tf_cr27     = gr[_REG_CR27];
 #endif
        }
 
        if ((flags & _UC_FPU) != 0) {
                struct pcb *pcb = lwp_getpcb(l);

Index: src/sys/arch/hppa/hppa/trap.S
===================================================================
--- src/sys/arch/hppa/hppa/trap.S
+++ src/sys/arch/hppa/hppa/trap.S
@@ -364,10 +364,13 @@
        mfctl   %pidr4, %arg3
        stw     %arg2, TF_CR12-TRAPFRAME_SIZEOF(%sr1, %t3)
        stw     %arg3, TF_CR13-TRAPFRAME_SIZEOF(%sr1, %t3)
 #endif
 
+       mfctl   CR_TLS, %arg0
+       stw     %arg0, TF_CR27-TRAPFRAME_SIZEOF(%sr1, %t3)
+
 #if defined(DDB) || defined(KGDB)
        /*
         * Save v2p translation table pointer
         */
        mfctl   %eirr, %arg0
@@ -505,10 +508,13 @@
        ldw     TF_CR12(%sr3, %t3), %t1
        ldw     TF_CR13(%sr3, %t3), %t2
        mtctl   %t1, %pidr3
        mtctl   %t2, %pidr4
 #endif
+       ldw     TF_CR27(%sr3, %t3), %t1
+       mtctl   %t1, CR_TLS
+
        ldw     TF_CR0(%sr3, %t3), %t1
        mtctl   %t1, %rctr
 
        ldw     TF_CR30(%sr3, %t3), %t1
        mtctl   %t1, CR_FPPADDR
@@ -2085,10 +2091,13 @@
 
        mfsp    %sr7, %t1
        mfctl   %pidr2, %t2
        stw     %t1, TF_SR7(%sr3, %t3)
        stw     %t2, TF_CR9(%sr3, %t3)
+
+       mfctl   CR_TLS, %t1
+       stw     %t1, TF_CR27(%sr3, %t3)
 
        mtsp    %r0, %sr0
        mtsp    %r0, %sr1
        mtsp    %r0, %sr2
        mtsp    %r0, %sr4

Index: src/sys/arch/hppa/hppa/vm_machdep.c
===================================================================
--- src/sys/arch/hppa/hppa/vm_machdep.c
+++ src/sys/arch/hppa/hppa/vm_machdep.c
@@ -40,10 +40,11 @@
 #include <sys/vnode.h>
 #include <sys/ptrace.h>
 #include <sys/exec.h>
 #include <sys/core.h>
 #include <sys/pool.h>
+#include <sys/cpu.h>
 
 #include <machine/cpufunc.h>
 #include <machine/pmap.h>
 #include <machine/pcb.h>
 
@@ -299,6 +300,16 @@
        pmap_remove(pmap, kva, kva + len);
        pmap_update(pmap);
        uvm_km_free(phys_map, kva, len, UVM_KMF_VAONLY);
        bp->b_data = bp->b_saveaddr;
        bp->b_saveaddr = NULL;
+}
+
+int
+cpu_lwp_setprivate(lwp_t *l, void *addr)
+{
+
+       l->l_md.md_regs->tf_cr27 = (u_int)addr;
+       if (l == curlwp)
+               mtctl(addr, CR_TLS);
+       return 0;
 }

Index: src/sys/arch/hppa/include/mcontext.h
===================================================================
--- src/sys/arch/hppa/include/mcontext.h
+++ src/sys/arch/hppa/include/mcontext.h
@@ -54,14 +54,24 @@
 #define        _UC_MACHINE_SET_PC(uc, pc)                                      
\
 do {                                                                   \
        (uc)->uc_mcontext.__gregs[_REG_PCOQH] = (pc);                   \
        (uc)->uc_mcontext.__gregs[_REG_PCOQT] = (pc) + 4;               \
 } while (/*CONSTCOND*/0)
+
+static inline void *
+__lwp_getprivate_fast(void)
+{
+       register void *__tmp;
+
+       __asm volatile("mfctl\t27 /* CR_TLS */, %0" : "=r" (__tmp));
+
+       return __tmp;
+}
 
 #endif /* !__ASSEMBLER__ */
 
 #define        _OFFSETOF_UC_GREGS 40
 
 #define        _UC_SETSTACK    0x00010000
 #define        _UC_CLRSTACK    0x00020000
 
 #endif /* _HPPA_MCONTEXT_H_ */

Index: src/sys/arch/hppa/include/types.h
===================================================================
--- src/sys/arch/hppa/include/types.h
+++ src/sys/arch/hppa/include/types.h
@@ -90,7 +90,10 @@
  * will construct PLABELs for them.  Make them "const char []" instead.
  */
 
 #define        RAS_DECL(name)                                                  
\
 extern const char __CONCAT(name,_ras_start[]), __CONCAT(name,_ras_end[])
+
+#define        __HAVE_CPU_LWP_SETPRIVATE
+#define        __HAVE___LWP_GETPRIVATE_FAST
 
 #endif /* _HPPA_TYPES_H_ */

Index: src/sys/arch/i386/i386/machdep.c
===================================================================
--- src/sys/arch/i386/i386/machdep.c
+++ src/sys/arch/i386/i386/machdep.c
@@ -1753,10 +1753,13 @@
            (void *) gr[_REG_EIP])) != -1)
                gr[_REG_EIP] = ras_eip;
 
        *flags |= _UC_CPU;
 
+       mcp->_mc_tlsbase = (uintptr_t)l->l_private;
+       *flags |= _UC_TLSBASE;
+
        /* Save floating point register context, if any. */
        if ((l->l_md.md_flags & MDL_USEDFPU) != 0) {
                struct pcb *pcb = lwp_getpcb(l);
 #if NNPX > 0
 
@@ -1842,10 +1845,13 @@
                tf->tf_eip    = gr[_REG_EIP];
                tf->tf_cs     = gr[_REG_CS];
                tf->tf_esp    = gr[_REG_UESP];
                tf->tf_ss     = gr[_REG_SS];
        }
+
+       if ((flags & _UC_TLSBASE) != 0)
+               lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
 
 #if NNPX > 0
        /*
         * If we were using the FPU, forget that we were.
         */

Index: src/sys/arch/i386/include/mcontext.h
===================================================================
--- src/sys/arch/i386/include/mcontext.h
+++ src/sys/arch/i386/include/mcontext.h
@@ -37,10 +37,11 @@
  * mcontext extensions to handle signal delivery.
  */
 #define _UC_SETSTACK   0x00010000
 #define _UC_CLRSTACK   0x00020000
 #define _UC_VM         0x00040000
+#define        _UC_TLSBASE     0x00080000
 
 /*
  * Layout of mcontext_t according to the System V Application Binary Interface,
  * Intel386(tm) Architecture Processor Supplement, Fourth Edition.
  */  
@@ -94,15 +95,16 @@
 } __fpregset_t;
 
 typedef struct {
        __gregset_t     __gregs;
        __fpregset_t    __fpregs;
+       __greg_t        _mc_tlsbase;
 } mcontext_t;
 
 #define _UC_FXSAVE     0x20    /* FP state is in FXSAVE format in XMM space */
 
-#define _UC_MACHINE_PAD        5       /* Padding appended to ucontext_t */
+#define _UC_MACHINE_PAD        4       /* Padding appended to ucontext_t */
 
 #define _UC_UCONTEXT_ALIGN     (~0xf)
 
 #ifdef _KERNEL_OPT
 #include "opt_vm86.h"
@@ -120,7 +122,17 @@
 #endif
 #define _UC_MACHINE_PC(uc)     ((uc)->uc_mcontext.__gregs[_REG_EIP])
 #define _UC_MACHINE_INTRV(uc)  ((uc)->uc_mcontext.__gregs[_REG_EAX])
 
 #define        _UC_MACHINE_SET_PC(uc, pc)      _UC_MACHINE_PC(uc) = (pc)
+
+static inline void *
+__lwp_getprivate_fast(void)
+{
+       void *__tmp;
+
+       __asm volatile("movl %%gs:0, %0" : "=r" (__tmp));
+
+       return __tmp;
+}
 
 #endif /* !_I386_MCONTEXT_H_ */

Index: src/sys/arch/i386/include/types.h
===================================================================
--- src/sys/arch/i386/include/types.h
+++ src/sys/arch/i386/include/types.h
@@ -110,11 +110,12 @@
 #define        __HAVE_OLD_DISKLABEL
 #define __HAVE_ATOMIC64_OPS
 #define        __HAVE_ATOMIC_AS_MEMBAR
 #define        __HAVE_CPU_LWP_SETPRIVATE
 #define        __HAVE_INTR_CONTROL
+#define        __HAVE___LWP_GETPRIVATE_FAST
 
 #if defined(_KERNEL)
 #define        __HAVE_RAS
 #endif
 
 #endif /* _I386_MACHTYPES_H_ */

Index: src/sys/arch/m68k/include/mcontext.h
===================================================================
--- src/sys/arch/m68k/include/mcontext.h
+++ src/sys/arch/m68k/include/mcontext.h
@@ -77,11 +77,11 @@
 
 typedef struct {
        __gregset_t     __gregs;        /* General Register set */
        __fpregset_t    __fpregs;       /* Floating Point Register set */
        union {
-               long    __mc_state[202];        /* Only need 308 bytes... */
+               long    __mc_state[201];        /* Only need 308 bytes... */
 #if defined(_KERNEL) || defined(__M68K_MCONTEXT_PRIVATE)
                struct {
                        /* Rest of the frame. */
                        unsigned int    __mcf_format;
                        unsigned int    __mcf_vector;
@@ -90,19 +90,21 @@
                        union FPF_u1    __mcf_fpf_u1;
                        union FPF_u2    __mcf_fpf_u2;
                } __mc_frame;
 #endif /* _KERNEL || __M68K_MCONTEXT_PRIVATE */
        }               __mc_pad;
+       __greg_t        _mc_tlsbase;
 } mcontext_t;
 
 /* Note: no additional padding is to be performed in ucontext_t. */
 
 /* Machine-specific uc_flags value */
 #define _UC_M68K_UC_USER 0x40000000
+#define        _UC_TLSBASE     0x00080000
 
 #define _UC_MACHINE_SP(uc)     ((uc)->uc_mcontext.__gregs[_REG_A7])
 #define _UC_MACHINE_PC(uc)     ((uc)->uc_mcontext.__gregs[_REG_PC])
 #define _UC_MACHINE_INTRV(uc)  ((uc)->uc_mcontext.__gregs[_REG_D0])
 
 #define        _UC_MACHINE_SET_PC(uc, pc)      _UC_MACHINE_PC(uc) = (pc)
 
 #endif /* !_M68K_MCONTEXT_H_ */

Index: src/sys/arch/m68k/m68k/sig_machdep.c
===================================================================
--- src/sys/arch/m68k/m68k/sig_machdep.c
+++ src/sys/arch/m68k/m68k/sig_machdep.c
@@ -259,10 +259,13 @@
            (void *) gr[_REG_PC])) != -1)
                gr[_REG_PC] = ras_pc;
 
        *flags |= _UC_CPU;
 
+       mcp->_mc_tlsbase = (uintptr_t)l->l_private;
+       *flags |= _UC_TLSBASE;
+
        /* Save exception frame information. */
        mcp->__mc_pad.__mc_frame.__mcf_format = format;
        if (format >= FMT4) {
                mcp->__mc_pad.__mc_frame.__mcf_vector = frame->f_vector;
                (void)memcpy(&mcp->__mc_pad.__mc_frame.__mcf_exframe,
@@ -418,10 +421,13 @@
                 * (from the PCB) when this lwp is given the CPU.
                 */
                if (l == curlwp)
                        m68881_restore(fpf);
        }
+
+       if ((flags & _UC_TLSBASE) != 0)
+               lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
 
        mutex_enter(l->l_proc->p_lock);
        if (flags & _UC_SETSTACK)
                l->l_sigstk.ss_flags |= SS_ONSTACK;
        if (flags & _UC_CLRSTACK)

Index: src/sys/arch/mips/include/mcontext.h
===================================================================
--- src/sys/arch/mips/include/mcontext.h
+++ src/sys/arch/mips/include/mcontext.h
@@ -123,34 +123,38 @@
 #endif
 
 typedef struct {
        __gregset_t     __gregs;
        __fpregset_t    __fpregs;
+       __greg_t        _mc_tlsbase;
 } mcontext_t;
 
 #if defined(_KERNEL) && defined(_LP64)
 typedef        __int32_t       __greg32_t;
 typedef __greg32_t     __gregset32_t[_NGREG];
 
 typedef struct {
        __gregset32_t           __gregs;
        struct __fpregset_oabi  __fpregs;
+       __greg_t                _mc_tlsbase;
 } mcontext_o32_t;
 
 typedef struct {
        __gregset_t             __gregs;
        struct __fpregset_nabi  __fpregs;
+       __greg_t                _mc_tlsbase;
 } mcontext32_t;
 
 #endif /* _KERNEL && _LP64 */
 
 #endif /* !__ASSEMBLER__ */
 
-#define _UC_MACHINE_PAD        16      /* Padding appended to ucontext_t */
+#define _UC_MACHINE_PAD        15      /* Padding appended to ucontext_t */
 
 #define        _UC_SETSTACK    0x00010000
 #define        _UC_CLRSTACK    0x00020000
+#define        _UC_TLSBASE     0x00040000
 
 #define _UC_MACHINE_SP(uc)     ((uc)->uc_mcontext.__gregs[_REG_SP])
 #define _UC_MACHINE_PC(uc)     ((uc)->uc_mcontext.__gregs[_REG_EPC])
 #define _UC_MACHINE_INTRV(uc)  ((uc)->uc_mcontext.__gregs[_REG_V0])
 
@@ -159,7 +163,16 @@
 #define _UC_MACHINE32_SP(uc)   _UC_MACHINE_SP(uc)
 #define _UC_MACHINE32_PC(uc)   _UC_MACHINE_PC(uc)
 #define _UC_MACHINE32_INTRV(uc)        _UC_MACHINE_INTRV(uc)
 
 #define        _UC_MACHINE32_SET_PC(uc, pc)    _UC_MACHINE_PC((uc), (pc))
+
+static inline void *
+__lwp_getprivate_fast(void)
+{
+       register void *__tcb;
+
+       __asm volatile(".set push; .set mips32r2; rdhwr %0, $29; .set pop" : 
"=v"(__tcb));
+       return __tcb;
+}
 
 #endif /* _MIPS_MCONTEXT_H_ */

Index: src/sys/arch/mips/include/types.h
===================================================================
--- src/sys/arch/mips/include/types.h
+++ src/sys/arch/mips/include/types.h
@@ -133,10 +133,11 @@
 #define        __HAVE_PROCESS_XFPREGS
 #define        __HAVE_CPU_DATA_FIRST
 #ifdef MIPS3_PLUS      /* XXX bogus! */
 #define        __HAVE_CPU_COUNTER
 #endif
+#define        __HAVE___LWP_GETPRIVATE_FAST
 
 #if !defined(__mips_o32)
 #define        __HAVE_ATOMIC64_OPS
 #endif
 

Index: src/sys/arch/mips/mips/mips_machdep.c
===================================================================
--- src/sys/arch/mips/mips/mips_machdep.c
+++ src/sys/arch/mips/mips/mips_machdep.c
@@ -2065,10 +2065,13 @@
            (void *) (intptr_t)gr[_REG_EPC])) != -1)
                gr[_REG_EPC] = ras_pc;
 
        *flags |= _UC_CPU;
 
+       mcp->_mc_tlsbase = (uintptr_t)l->l_private;
+       *flags |= _UC_TLSBASE;
+
        /* Save floating point register context, if any. */
        if (l->l_md.md_flags & MDP_FPUSED) {
                struct pcb *pcb;
                size_t fplen;
 
@@ -2142,10 +2145,13 @@
                 * proper size of fpreg when copying.
                 */
                pcb = lwp_getpcb(l);
                memcpy(&pcb->pcb_fpregs, &mcp->__fpregs, fplen);
        }
+
+       if ((flags & _UC_TLSBASE) != 0)
+               lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
 
        mutex_enter(p->p_lock);
        if (flags & _UC_SETSTACK)
                l->l_sigstk.ss_flags |= SS_ONSTACK;
        if (flags & _UC_CLRSTACK)

Index: src/sys/arch/mips/mips/netbsd32_machdep.c
===================================================================
--- src/sys/arch/mips/mips/netbsd32_machdep.c
+++ src/sys/arch/mips/mips/netbsd32_machdep.c
@@ -260,10 +260,12 @@
        for (i = 0; i < __arraycount(mc.__gregs); i++)
                mco32->__gregs[i] = mc.__gregs[i];
        if (*flagsp & _UC_FPU)
                memcpy(&mco32->__fpregs, &mc.__fpregs,
                    sizeof(struct fpreg_oabi));
+       mco32->_mc_tlsbase = mc._mc_tlsbase;
+       *flags |= _UC_TLSBASE;
 }
 
 int
 cpu_setmcontext32(struct lwp *l, const mcontext32_t *mc32, unsigned int flags)
 {
@@ -277,10 +279,11 @@
        for (i = 0; i < __arraycount(mc.__gregs); i++)
                mc.__gregs[i] = mco32->__gregs[i];
        if (flags & _UC_FPU)
                memcpy(&mc.__fpregs, &mco32->__fpregs,
                    sizeof(struct fpreg_oabi));
+       mc._mc_tlsbase = mco32->_mc_tlsbase;
        return cpu_setmcontext(l, &mc, flags);
 }
 
 #ifdef COREDUMP
 /*

Index: src/sys/arch/powerpc/include/mcontext.h
===================================================================
--- src/sys/arch/powerpc/include/mcontext.h
+++ src/sys/arch/powerpc/include/mcontext.h
@@ -119,7 +119,15 @@
 #define _UC_MACHINE_SP(uc)     ((uc)->uc_mcontext.__gregs[_REG_R1])
 #define _UC_MACHINE_PC(uc)     ((uc)->uc_mcontext.__gregs[_REG_PC])
 #define _UC_MACHINE_INTRV(uc)  ((uc)->uc_mcontext.__gregs[_REG_R3])
 
 #define        _UC_MACHINE_SET_PC(uc, pc)      _UC_MACHINE_PC(uc) = (pc)
+
+static inline void *
+__lwp_getprivate_fast(void)
+{
+       register void *__tmp __asm__("r2");
+
+       return __tmp;
+}
 
 #endif /* !_POWERPC_MCONTEXT_H_ */

Index: src/sys/arch/powerpc/include/types.h
===================================================================
--- src/sys/arch/powerpc/include/types.h
+++ src/sys/arch/powerpc/include/types.h
@@ -76,7 +76,8 @@
 #define __HAVE_CPU_LWP_SETPRIVATE
 #define        __HAVE_CPU_DATA_FIRST
 #ifdef _LP64
 #define        __HAVE_ATOMIC64_OPS
 #endif
+#define        __HAVE___LWP_GETPRIVATE_FAST
 
 #endif /* _MACHTYPES_H_ */

Index: src/sys/arch/sh3/include/mcontext.h
===================================================================
--- src/sys/arch/sh3/include/mcontext.h
+++ src/sys/arch/sh3/include/mcontext.h
@@ -91,7 +91,16 @@
  * Machine dependent uc_flags
  */
 #define        _UC_SETSTACK            0x10000
 #define        _UC_CLRSTACK            0x20000
 
+static inline void *
+__lwp_getprivate_fast(void)
+{
+       register void *__gbr;
+
+       __asm volatile("stc gbr, %0" : "=r" (__gbr));
+
+       return __gbr;
+}
 
 #endif /* !_SH3_MCONTEXT_H_ */

Index: src/sys/arch/sh3/include/types.h
===================================================================
--- src/sys/arch/sh3/include/types.h
+++ src/sys/arch/sh3/include/types.h
@@ -74,7 +74,10 @@
 #define        __HAVE_CPU_DATA_FIRST
 
 #if defined(_KERNEL)
 #define        __HAVE_RAS
 #endif
+
+#define        __HAVE_CPU_LWP_SETPRIVATE
+#define        __HAVE___LWP_GETPRIVATE_FAST
 
 #endif /* !_SH3_TYPES_H_ */

Index: src/sys/arch/sh3/sh3/sh3_machdep.c
===================================================================
--- src/sys/arch/sh3/sh3/sh3_machdep.c
+++ src/sys/arch/sh3/sh3/sh3_machdep.c
@@ -86,10 +86,11 @@
 #include <sys/ras.h>
 #include <sys/sa.h>
 #include <sys/savar.h>
 #include <sys/syscallargs.h>
 #include <sys/ucontext.h>
+#include <sys/cpu.h>
 
 #ifdef KGDB
 #include <sys/kgdb.h>
 #ifndef KGDB_DEVNAME
 #define        KGDB_DEVNAME "nodev"
@@ -512,10 +513,12 @@
                tf->tf_r3     = gr[_REG_R3];
                tf->tf_r2     = gr[_REG_R2];
                tf->tf_r1     = gr[_REG_R1];
                tf->tf_r0     = gr[_REG_R0];
                tf->tf_r15    = gr[_REG_R15];
+
+               lwp_setprivate(l, (void *)(uintptr_t)gr[_REG_GBR]);
        }
 
 #if 0
        /* XXX: FPU context is currently not handled by the kernel. */
        if (flags & _UC_FPU) {
@@ -583,6 +586,14 @@
 
 #ifndef __lint__
        goto *(void *)0xa0000000;
 #endif
        /* NOTREACHED */
+}
+
+int
+cpu_lwp_setprivate(lwp_t *l, void *addr)
+{
+
+       l->l_md.md_regs->tf_gbr = (int)addr;
+       return 0;
 }

Index: src/sys/arch/sparc/include/mcontext.h
===================================================================
--- src/sys/arch/sparc/include/mcontext.h
+++ src/sys/arch/sparc/include/mcontext.h
@@ -157,7 +157,17 @@
 #define        _UC_MACHINE_SET_PC(uc, pc)                                      
\
 do {                                                                   \
        (uc)->uc_mcontext.__gregs[_REG_PC] = (pc);                      \
        (uc)->uc_mcontext.__gregs[_REG_nPC] = (pc) + 4;                 \
 } while (/*CONSTCOND*/0)
+
+static inline void *
+__lwp_getprivate_fast(void)
+{
+       register void *__tmp;
+
+       __asm volatile("mov %%g7, %0" : "=r" (__tmp));
+
+       return __tmp;
+}
 
 #endif /* !_SPARC_MCONTEXT_H_ */

Index: src/sys/arch/sparc/include/types.h
===================================================================
--- src/sys/arch/sparc/include/types.h
+++ src/sys/arch/sparc/include/types.h
@@ -125,7 +125,9 @@
 #if defined(_KERNEL)
 #define __HAVE_RAS
 #endif
 #endif
 
+#define        __HAVE_CPU_LWP_SETPRIVATE
+#define        __HAVE___LWP_GETPRIVATE_FAST
 
 #endif /* _MACHTYPES_H_ */

Index: src/sys/arch/sparc/sparc/machdep.c
===================================================================
--- src/sys/arch/sparc/sparc/machdep.c
+++ src/sys/arch/sparc/sparc/machdep.c
@@ -775,10 +775,12 @@
                tf->tf_out[3] = r[_REG_O3];
                tf->tf_out[4] = r[_REG_O4];
                tf->tf_out[5] = r[_REG_O5];
                tf->tf_out[6] = r[_REG_O6];
                tf->tf_out[7] = r[_REG_O7];
+
+               lwp_setprivate(l, (void *)(uintptr_t)r[_REG_G7]);
        }
 
 #ifdef FPU_CONTEXT
        if (flags & _UC_FPU) {
                /*

Index: src/sys/arch/sparc/sparc/vm_machdep.c
===================================================================
--- src/sys/arch/sparc/sparc/vm_machdep.c
+++ src/sys/arch/sparc/sparc/vm_machdep.c
@@ -60,10 +60,11 @@
 #include <sys/malloc.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/vnode.h>
 #include <sys/simplelock.h>
+#include <sys/cpu.h>
 
 #include <uvm/uvm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/frame.h>
@@ -341,6 +342,16 @@
 
        pcb->pcb_pc = (int)lwp_setfunc_trampoline - 8;
        pcb->pcb_sp = (int)rp;
        pcb->pcb_psr &= ~PSR_CWP;       /* Run in window #0 */
        pcb->pcb_wim = 1;               /* Fence at window #1 */
+}
+
+int
+cpu_lwp_setprivate(lwp_t *l, void *addr)
+{
+       struct trapframe *tf = l->l_md.md_tf;
+
+       tf->tf_global[7] = (uintptr_t)addr;
+
+       return 0;
 }

Index: src/sys/arch/sparc64/sparc64/netbsd32_machdep.c
===================================================================
--- src/sys/arch/sparc64/sparc64/netbsd32_machdep.c
+++ src/sys/arch/sparc64/sparc64/netbsd32_machdep.c
@@ -1225,10 +1225,12 @@
                tf->tf_out[4]    = (uint64_t)gr[_REG32_O4];
                tf->tf_out[5]    = (uint64_t)gr[_REG32_O5];
                tf->tf_out[6]    = (uint64_t)gr[_REG32_O6];
                tf->tf_out[7]    = (uint64_t)gr[_REG32_O7];
                /* %asi restored above; %fprs not yet supported. */
+
+               lwp_setprivate(l, (void *)(uintptr_t)gr[_REG_G7]);
 
                /* XXX mcp->__gwins */
        }
 
        /* Restore floating point register context, if any. */

Index: src/sys/arch/sparc64/sparc64/vm_machdep.c
===================================================================
--- src/sys/arch/sparc64/sparc64/vm_machdep.c
+++ src/sys/arch/sparc64/sparc64/vm_machdep.c
@@ -59,10 +59,11 @@
 #include <sys/proc.h>
 #include <sys/core.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/vnode.h>
+#include <sys/cpu.h>
 
 #include <uvm/uvm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/frame.h>
@@ -351,6 +352,16 @@
 {
        struct fpstate64 *fs;
 
        if ((fs = l->l_md.md_fpstate) != NULL)
                pool_cache_put(fpstate_cache, fs);
+}
+
+int
+cpu_lwp_setprivate(lwp_t *l, void *addr)
+{
+       struct trapframe *tf = l->l_md.md_tf;
+
+       tf->tf_global[7] = (uintptr_t)addr;
+
+       return 0;
 }



Home | Main Index | Thread Index | Old Index