Subject: Re: compat/linux32: getpid() returning wrong pid after clone()
To: Edgar =?iso-8859-1?B?RnXf?= <ef@math.uni-bonn.de>
From: Nicolas Joly <njoly@pasteur.fr>
List: port-amd64
Date: 03/15/2007 22:51:55
--Nq2Wo0NMKNjxTN9z
Content-Type: text/plain; charset=iso-8859-1
Content-Disposition: inline
Content-Transfer-Encoding: 8bit

On Thu, Mar 15, 2007 at 09:57:03PM +0100, Edgar Fuß wrote:
> I'm confused.
> (Syscall #158 actually is implemented in the kernel I'm using. I  
> simply don't know how to tell kdump it is.)

You simply need to rebuild kdump with updated your kernel sources.

>   5183      1 dsmc     RET   clone 5247/0x147f
>   5183      1 dsmc     CALL  kill(0x83b, SIGRT0)
>   5183      1 dsmc     RET   kill 0
>   2107      1 dsmc     RET   rt_sigsuspend -1 unknown errno 4
>   2107      1 dsmc     PSIG  SIGRT0 caught handler=0xfbf663d0 mask= 
> (2,3,15,24,25,32))
>   2107      1 dsmc     CALL  sigreturn(0xffffcbdc)
>   2107      1 dsmc     RET   sigreturn -1 errno -2 No such file or  
> directory
>   2107      1 dsmc     CALL  #158 (unimplemented sched_yield)
>   5247      1 dsmc     EMUL  "linux32"
>   5247      1 dsmc     RET   fork 0
>   5247      1 dsmc     CALL  getpid
>   5247      1 dsmc     RET   getpid 2107/0x83b
> 
> I've been staring at this for minutes now, but probably others concur  
> that getpid(2) does not return what one would expect.
> 
> I've been unable to build a linux program exhibiting the same  
> behaviour myself, but I'm by far no linux expert.
> 
> Looks like either clone(2) or getpid(2) is seriously broken, isn't  
> it? However, digging through the sources I can't find anything  
> obviously wrong.

This was a known problem i fixed in -current.
<URL:http://mail-index.netbsd.org/source-changes/2007/02/05/0044.html>

The problem is that COMPAT_LINUX emulate a Linux v2.6 kernel (NPTL
threads emulation), but COMPAT_LINUX32 is limited to Linux v2.4 (old
linuxthreads emulation). And some syscalls such as getpid(2) have
different behaviour under the 2 versions ...

The fix needs to be pulled to the netbsd-4 branch. Can you test the
attached patch for me (i only have -current machines) ?

-- 
Nicolas Joly

Biological Software and Databanks.
Institut Pasteur, Paris.

--Nq2Wo0NMKNjxTN9z
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="netbsd-linuxemultype.diff"

Index: sys/compat/linux/common/linux_emuldata.h
===================================================================
RCS file: /cvsroot/src/sys/compat/linux/common/linux_emuldata.h,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- sys/compat/linux/common/linux_emuldata.h	23 Aug 2006 19:49:09 -0000	1.12
+++ sys/compat/linux/common/linux_emuldata.h	5 Feb 2007 18:31:36 -0000	1.13
@@ -58,6 +58,7 @@
 };
 
 #define LINUX_LES_INEXITGROUP	0x1	/* thread group doing exit_group() */
+#define LINUX_LES_USE_NPTL	0x2	/* Need to emulate NPTL threads */
 
 struct linux_emuldata {
 #if notyet
Index: sys/compat/linux/common/linux_sched.c
===================================================================
RCS file: /cvsroot/src/sys/compat/linux/common/linux_sched.c,v
retrieving revision 1.38
retrieving revision 1.39
diff -u -r1.38 -r1.39
--- sys/compat/linux/common/linux_sched.c	5 Jan 2007 15:46:39 -0000	1.38
+++ sys/compat/linux/common/linux_sched.c	5 Feb 2007 18:31:36 -0000	1.39
@@ -366,46 +366,53 @@
 	struct linux_emuldata *led = p->p_emuldata;
 	struct linux_emuldata *e;
 
+	if (led->s->flags & LINUX_LES_USE_NPTL) {
+
 #ifdef DEBUG_LINUX
-	printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__, led->s->refs);
+		printf("%s:%d, led->s->refs = %d\n", __func__, __LINE__,
+		    led->s->refs);
 #endif
-	/*
-	 * The calling thread is supposed to kill all threads
-	 * in the same thread group (i.e. all threads created
-	 * via clone(2) with CLONE_THREAD flag set).
-	 *
-	 * If there is only one thread, things are quite simple
-	 */
-	if (led->s->refs == 1)
-		return sys_exit(l, v, retval);
+
+		/*
+		 * The calling thread is supposed to kill all threads
+		 * in the same thread group (i.e. all threads created
+		 * via clone(2) with CLONE_THREAD flag set).
+		 *
+		 * If there is only one thread, things are quite simple
+		 */
+		if (led->s->refs == 1)
+			return sys_exit(l, v, retval);
 
 #ifdef DEBUG_LINUX
-	printf("%s:%d\n", __func__, __LINE__);
+		printf("%s:%d\n", __func__, __LINE__);
 #endif
 
-	led->s->flags |= LINUX_LES_INEXITGROUP;
-	led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
+		led->s->flags |= LINUX_LES_INEXITGROUP;
+		led->s->xstat = W_EXITCODE(SCARG(uap, error_code), 0);
 
-	/*
-	 * Kill all threads in the group. The emulation exit hook takes
-	 * care of hiding the zombies and reporting the exit code properly
-	 */
-      	LIST_FOREACH(e, &led->s->threads, threads) {
-		if (e->proc == p)
-			continue;
+		/*
+		 * Kill all threads in the group. The emulation exit hook takes
+		 * care of hiding the zombies and reporting the exit code
+		 * properly.
+		 */
+      		LIST_FOREACH(e, &led->s->threads, threads) {
+			if (e->proc == p)
+				continue;
 
 #ifdef DEBUG_LINUX
-		printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
+			printf("%s: kill PID %d\n", __func__, e->proc->p_pid);
 #endif
-		psignal(e->proc, SIGKILL);
+			psignal(e->proc, SIGKILL);
+		}
+
+		/* Now, kill ourselves */
+		psignal(p, SIGKILL);
+		return 0;
+
 	}
+#endif /* LINUX_NPTL */
 
-	/* Now, kill ourselves */
-	psignal(p, SIGKILL);
-	return 0;
-#else /* LINUX_NPTL */
 	return sys_exit(l, v, retval);
-#endif /* LINUX_NPTL */
 }
 #endif /* !__m68k__ */
 
@@ -424,6 +431,8 @@
 	led = (struct linux_emuldata *)l->l_proc->p_emuldata;
 	led->clear_tid = SCARG(uap, tid);
 
+	led->s->flags |= LINUX_LES_USE_NPTL;
+
 	*retval = l->l_proc->p_pid;
 
 	return 0;
@@ -449,12 +458,14 @@
 	void *v;
 	register_t *retval;
 {
-	struct linux_emuldata *led;
-
-	led = l->l_proc->p_emuldata;
+	struct linux_emuldata *led = l->l_proc->p_emuldata;
 
-	/* The Linux kernel does it exactly that way */
-	*retval = led->s->group_pid;
+	if (led->s->flags & LINUX_LES_USE_NPTL) {
+		/* The Linux kernel does it exactly that way */
+		*retval = led->s->group_pid;
+	} else {
+		*retval = l->l_proc->p_pid;
+	}
 
 	return 0;
 }
@@ -471,23 +482,29 @@
 	struct proc *glp;
 	struct proc *pp;
 
-	/* Find the thread group leader's parent */
-	if ((glp = pfind(led->s->group_pid)) == NULL) {
-		/* Maybe panic... */
-		printf("linux_sys_getppid: missing group leader PID %d\n", 
-		    led->s->group_pid); 
-		return -1;
-	}
-	pp = glp->p_pptr;
+	if (led->s->flags & LINUX_LES_USE_NPTL) {
 
-	/* If this is a Linux process too, return thread group PID */
-	if (pp->p_emul == p->p_emul) {
-		struct linux_emuldata *pled;
+		/* Find the thread group leader's parent */
+		if ((glp = pfind(led->s->group_pid)) == NULL) {
+			/* Maybe panic... */
+			printf("linux_sys_getppid: missing group leader PID"
+			    " %d\n", led->s->group_pid); 
+			return -1;
+		}
+		pp = glp->p_pptr;
+
+		/* If this is a Linux process too, return thread group PID */
+		if (pp->p_emul == p->p_emul) {
+			struct linux_emuldata *pled;
+
+			pled = pp->p_emuldata;
+			*retval = pled->s->group_pid;
+		} else {
+			*retval = pp->p_pid;
+		}
 
-		pled = pp->p_emuldata;
-		*retval = pled->s->group_pid;
 	} else {
-		*retval = pp->p_pid;
+		*retval = p->p_pptr->p_pid;
 	}
 
 	return 0;

--Nq2Wo0NMKNjxTN9z--