Subject: proposed fix for awkward allocation in sa_switch()
To: None <tech-kern@netbsd.org>
From: Nathan J. Williams <nathanw@wasabisystems.com>
List: tech-kern
Date: 03/03/2004 15:19:30
--=enigma-analyzer-Lon-Horiuchi-offensive-information-warfare-TWA-Consu
I recently went and re-examined a memory-allocation problem in
sa_switch(), where we need to allocate a structure while inside
tsleep(). Even with all the WAITOK flags turned off, this causes
problems when the pool code tries to wake up the pagedaemon with the
scheduler lock held.
There's a comment in the code that "It would be nice if we could
safely allocate the sadata_upcall structure on the stack". It can't be
allocated on the caller's stack, because the caller may wake up and
return to userland asynchronously with the new LWP using the
structure. However, since the new LWP is the one that needs the
structure, it can be allocated on the new LWP's stack, with some
slight abuse of proc_trampoline to clean up the stack space, and
adjusting the two users of proc_trampoline per architecture. It does
depend (as implemented so far) on being able to pass one more bit of
data to proc_trampoline; I haven't looked at all architectures yet,
but this seems doable.
Comments?
- Nathan
--=enigma-analyzer-Lon-Horiuchi-offensive-information-warfare-TWA-Consu
Content-Type: text/x-patch
Content-Disposition: attachment; filename=sadiff
Content-Description: sa upcall allocation patch
Index: sys/lwp.h
===================================================================
RCS file: /cvsroot/src/sys/sys/lwp.h,v
retrieving revision 1.19
diff -u -p -r1.19 lwp.h
--- sys/lwp.h 11 Jan 2004 18:37:52 -0000 1.19
+++ sys/lwp.h 3 Mar 2004 20:04:46 -0000
@@ -175,7 +175,7 @@ int newlwp(struct lwp *, struct proc *,
#define LWPWAIT_EXITCONTROL 0x00000001
int lwp_wait1(struct lwp *, lwpid_t, lwpid_t *, int);
void lwp_continue(struct lwp *);
-void cpu_setfunc(struct lwp *, void (*)(void *), void *);
+void *cpu_setfunc(struct lwp *, void (*)(void *), size_t, void *);
void startlwp(void *);
void upcallret(struct lwp *);
void lwp_exit (struct lwp *);
Index: sys/savar.h
===================================================================
RCS file: /cvsroot/src/sys/sys/savar.h,v
retrieving revision 1.14
diff -u -p -r1.14 savar.h
--- sys/savar.h 2 Jan 2004 18:52:17 -0000 1.14
+++ sys/savar.h 3 Mar 2004 20:04:46 -0000
@@ -70,6 +70,7 @@ struct sadata_upcall {
#define SAU_FLAG_DEFERRED_EVENT 0x1
#define SAU_FLAG_DEFERRED_INTERRUPTED 0x2
+#define SAU_FLAG_STACKALLOC 0x4
#define SA_UPCALL_TYPE_MASK 0x00FF
Index: kern/kern_sa.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_sa.c,v
retrieving revision 1.47
diff -u -p -r1.47 kern_sa.c
--- kern/kern_sa.c 2 Jan 2004 18:52:17 -0000 1.47
+++ kern/kern_sa.c 3 Mar 2004 20:04:46 -0000
@@ -138,7 +138,8 @@ sadata_upcall_free(struct sadata_upcall
}
}
- pool_put(&saupcall_pool, sau);
+ if ((sau->sau_flags & SAU_FLAG_STACKALLOC) == 0)
+ pool_put(&saupcall_pool, sau);
}
int
@@ -898,24 +899,8 @@ sa_switch(struct lwp *l, int type)
return;
}
- /*
- * XXX We need to allocate the sadata_upcall structure here,
- * XXX since we can't sleep while waiting for memory inside
- * XXX sa_upcall(). It would be nice if we could safely
- * XXX allocate the sadata_upcall structure on the stack, here.
- */
- sau = sadata_upcall_alloc(0);
- if (sau == NULL) {
-#ifdef DIAGNOSTIC
- printf("sa_switch(%d.%d): couldn't allocate upcall data.\n",
- p->p_pid, l->l_lid);
-#endif
- sa_putcachelwp(p, l2); /* PHOLD from sa_getcachelwp */
- mi_switch(l, NULL);
- return;
- }
-
- cpu_setfunc(l2, sa_switchcall, sau);
+ sau = cpu_setfunc(l2, sa_switchcall,
+ sizeof(struct sadata_upcall), NULL);
error = sa_upcall0(l2, SA_UPCALL_BLOCKED, l, NULL, 0, NULL,
sau);
if (error) {
@@ -927,6 +912,7 @@ sa_switch(struct lwp *l, int type)
mi_switch(l, NULL);
return;
}
+ sau->sau_flags |= SAU_FLAG_STACKALLOC;
/*
* Perform the double/upcall pagefault check.
Index: arch/i386/i386/vm_machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/vm_machdep.c,v
retrieving revision 1.116
diff -u -p -r1.116 vm_machdep.c
--- arch/i386/i386/vm_machdep.c 6 Feb 2004 10:28:03 -0000 1.116
+++ arch/i386/i386/vm_machdep.c 3 Mar 2004 20:04:47 -0000
@@ -198,24 +198,32 @@ cpu_lwp_fork(struct lwp *l1, struct lwp
sf = (struct switchframe *)tf - 1;
sf->sf_esi = (int)func;
+ sf->sf_edi = 0;
sf->sf_ebx = (int)arg;
sf->sf_eip = (int)proc_trampoline;
pcb->pcb_esp = (int)sf;
pcb->pcb_ebp = 0;
}
-void
-cpu_setfunc(struct lwp *l, void (*func)(void *), void *arg)
+void *
+cpu_setfunc(struct lwp *l, void (*func)(void *), size_t space, void *arg)
{
struct pcb *pcb = &l->l_addr->u_pcb;
struct trapframe *tf = l->l_md.md_regs;
- struct switchframe *sf = (struct switchframe *)tf - 1;
+ void *spaceptr = (char *)tf - space;
+ struct switchframe *sf = (struct switchframe *)spaceptr - 1;
+
+ if (space != 0)
+ arg = spaceptr;
sf->sf_esi = (int)func;
+ sf->sf_edi = (int)space;
sf->sf_ebx = (int)arg;
sf->sf_eip = (int)proc_trampoline;
pcb->pcb_esp = (int)sf;
pcb->pcb_ebp = 0;
+
+ return arg;
}
void
Index: arch/i386/i386/locore.S
===================================================================
RCS file: /cvsroot/src/sys/arch/i386/i386/locore.S,v
retrieving revision 1.24
diff -u -p -r1.24 locore.S
--- arch/i386/i386/locore.S 20 Feb 2004 17:35:01 -0000 1.24
+++ arch/i386/i386/locore.S 3 Mar 2004 20:04:47 -0000
@@ -701,6 +701,7 @@ NENTRY(proc_trampoline)
pushl %ebx
call *%esi
addl $4,%esp
+ addl %edi,%esp
DO_DEFERRED_SWITCH(%eax)
INTRFASTEXIT
/* NOTREACHED */
Index: arch/alpha/alpha/vm_machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/alpha/alpha/vm_machdep.c,v
retrieving revision 1.82
diff -u -p -r1.82 vm_machdep.c
--- arch/alpha/alpha/vm_machdep.c 5 Jan 2004 23:51:19 -0000 1.82
+++ arch/alpha/alpha/vm_machdep.c 3 Mar 2004 20:04:47 -0000
@@ -209,35 +209,41 @@ cpu_lwp_fork(struct lwp *l1, struct lwp
up->u_pcb.pcb_context[0] =
(u_int64_t)func; /* s0: pc */
up->u_pcb.pcb_context[1] =
- (u_int64_t)exception_return; /* s1: ra */
+ (u_int64_t)arg; /* s1: arg */
up->u_pcb.pcb_context[2] =
- (u_int64_t)arg; /* s2: arg */
+ (u_int64_t)0; /* s2: size */
up->u_pcb.pcb_context[7] =
(u_int64_t)proc_trampoline; /* ra: assembly magic */
up->u_pcb.pcb_context[8] = ALPHA_PSL_IPL_0; /* ps: IPL */
}
}
-void
-cpu_setfunc(l, func, arg)
+void *
+cpu_setfunc(l, func, space, arg)
struct lwp *l;
void (*func) __P((void *));
+ size_t space;
void *arg;
{
struct user *up = l->l_addr;
+ void *spaceptr = (char *)l->l_md.md_tf - space;
+
+ if (space != 0)
+ arg = spaceptr;
up->u_pcb.pcb_hw.apcb_ksp =
- (u_int64_t)l->l_md.md_tf;
+ (u_int64_t)l->l_md.md_tf - space;
up->u_pcb.pcb_context[0] =
(u_int64_t)func; /* s0: pc */
up->u_pcb.pcb_context[1] =
- (u_int64_t)exception_return; /* s1: ra */
+ (u_int64_t)arg; /* s1: arg */
up->u_pcb.pcb_context[2] =
- (u_int64_t)arg; /* s2: arg */
+ (u_int64_t)space; /* s2: size */
up->u_pcb.pcb_context[7] =
(u_int64_t)proc_trampoline; /* ra: assembly magic */
up->u_pcb.pcb_context[8] = ALPHA_PSL_IPL_0; /* ps: IPL */
+ return arg;
}
/*
Index: arch/alpha/alpha/locore.s
===================================================================
RCS file: /cvsroot/src/sys/arch/alpha/alpha/locore.s,v
retrieving revision 1.105
diff -u -p -r1.105 locore.s
--- arch/alpha/alpha/locore.s 4 Nov 2003 10:33:16 -0000 1.105
+++ arch/alpha/alpha/locore.s 3 Mar 2004 20:04:48 -0000
@@ -984,9 +984,10 @@ LEAF_NOPROFILE(proc_trampoline, 0)
CALL(proc_trampoline_mp)
#endif
mov s0, pv
- mov s1, ra
- mov s2, a0
- jmp zero, (pv)
+ mov s1, a0
+ jsr ra, (pv)
+ addq sp, s2, sp
+ br exception_return
END(proc_trampoline)
/*
Index: arch/arm/arm32/vm_machdep.c
===================================================================
RCS file: /cvsroot/src/sys/arch/arm/arm32/vm_machdep.c,v
retrieving revision 1.31
diff -u -p -r1.31 vm_machdep.c
--- arch/arm/arm32/vm_machdep.c 4 Jan 2004 11:33:29 -0000 1.31
+++ arch/arm/arm32/vm_machdep.c 3 Mar 2004 20:04:48 -0000
@@ -199,21 +199,29 @@ cpu_lwp_fork(l1, l2, stack, stacksize, f
sf = (struct switchframe *)tf - 1;
sf->sf_r4 = (u_int)func;
sf->sf_r5 = (u_int)arg;
+ sf->sf_r6 = (u_int)0;
sf->sf_pc = (u_int)proc_trampoline;
pcb->pcb_un.un_32.pcb32_sp = (u_int)sf;
}
-void
-cpu_setfunc(struct lwp *l, void (*func)(void *), void *arg)
+void *
+cpu_setfunc(struct lwp *l, void (*func)(void *), size_t space, void *arg)
{
struct pcb *pcb = &l->l_addr->u_pcb;
struct trapframe *tf = pcb->pcb_tf;
- struct switchframe *sf = (struct switchframe *)tf - 1;
+ void *spaceptr = (char *)tf - space;
+ struct switchframe *sf = (struct switchframe *)spaceptr - 1;
+
+ if (space != 0)
+ arg = spaceptr;
sf->sf_r4 = (u_int)func;
sf->sf_r5 = (u_int)arg;
+ sf->sf_r6 = (u_int)space;
sf->sf_pc = (u_int)proc_trampoline;
pcb->pcb_un.un_32.pcb32_sp = (u_int)sf;
+
+ return arg;
}
/*
Index: arch/arm/arm32/cpuswitch.S
===================================================================
RCS file: /cvsroot/src/sys/arch/arm/arm32/cpuswitch.S,v
retrieving revision 1.41
diff -u -p -r1.41 cpuswitch.S
--- arch/arm/arm32/cpuswitch.S 15 Nov 2003 08:44:18 -0000 1.41
+++ arch/arm/arm32/cpuswitch.S 3 Mar 2004 20:04:49 -0000
@@ -1023,6 +1023,7 @@ ENTRY(proc_trampoline)
mov lr, pc
mov pc, r4
+ add sp, sp, r6
/* Kill irq's */
mrs r0, cpsr
orr r0, r0, #(I32_bit)
--=enigma-analyzer-Lon-Horiuchi-offensive-information-warfare-TWA-Consu--