NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: port-mips/59064 (jemalloc switch to 5.3 broke userland)



The following reply was made to PR port-mips/59064; it has been noted by GNATS.

From: Taylor R Campbell <riastradh%NetBSD.org@localhost>
To: Rin Okuyama <rokuyama.rk%gmail.com@localhost>
Cc: Martin Husemann <martin%duskware.de@localhost>, gnats-bugs%NetBSD.org@localhost,
	port-mips-maintainer%NetBSD.org@localhost, gnats-admin%NetBSD.org@localhost,
	netbsd-bugs%NetBSD.org@localhost, martin%NetBSD.org@localhost, simonb%NetBSD.org@localhost,
	joerg%NetBSD.org@localhost, dholland%NetBSD.org@localhost
Subject: Re: port-mips/59064 (jemalloc switch to 5.3 broke userland)
Date: Mon, 14 Apr 2025 16:52:14 +0000

 This is a multi-part message in MIME format.
 --=_3GH/d8a4GiEuk6sY511/abKYWvHzDwXu
 Content-Transfer-Encoding: quoted-printable
 
 [+cc another mips wizard]
 
 At this point I am well and truly baffled.
 
 Running the attached test program (same name as the other program but
 rather inaptly named now) under gdb, it works fine every time, with:
 
 (gdb) set environment LD_LIBRARY_PATH =3D /var/tmp/20250413/lib
 
 (/var/tmp/20250413/lib/libc.so.12 is built with jemalloc using
 initial-exec tls, while /lib/libc.so.12 is not -- this way programs
 generally work, including gdb, but I can test the broken libc with a
 target program.)
 
 If I set a breakpoint on malloc_default, and single-step through, I
 get to:
 
 (gdb) disas
 Dump of assembler code for function malloc_default:
    0x7853be08 <+0>:     addiu   sp,sp,-144
    0x7853be0c <+4>:     sd      gp,120(sp)
    0x7853be10 <+8>:     lui     gp,0x16
    0x7853be14 <+12>:    addu    gp,gp,t9
    0x7853be18 <+16>:    addiu   gp,gp,-31640
 =3D> 0x7853be1c <+20>:    lw      v0,-23368(gp)
    0x7853be20 <+24>:    .word   0x7c03e83b		/* rdhwr v1,$29 */
 ...
 (gdb) print (void *)$gp
 $4 =3D (void *) 0x78694270 <path+752>
 (gdb) x/xw $gp - 23368
 0x7868e728:     0xffff9008
 
 That looks fine (0xffff9008 =3D -28664, a plausible tls offset from
 $v1), and $v0 and $v1 hold reasonable values when I single-step
 through to
 
    0x7853be50 <+72>:    lbu     v1,600(a2)
 
 and it all works:
 
 (gdb) print (void *)$v0
 $8 =3D (void *) 0xffff9008
 (gdb) print (void *)$v1
 $9 =3D (void *) 0x786b9008
 
 But for some runs of
 
 $ LD_LIBRARY_PATH=3D/var/tmp/20250413/lib ./rdhwr
 
 it crashes at the lbu instruction, and gdb on the core dump shows:
 
 (gdb) disas
 Dump of assembler code for function malloc_default:
    0x7853be08 <+0>:     addiu   sp,sp,-144
    0x7853be0c <+4>:     sd      gp,120(sp)
    0x7853be10 <+8>:     lui     gp,0x16
    0x7853be14 <+12>:    addu    gp,gp,t9
    0x7853be18 <+16>:    addiu   gp,gp,-31640
    0x7853be1c <+20>:    lw      v0,-23368(gp)
    0x7853be20 <+24>:    .word   0x7c03e83b		/* rdhwr v1,$29 */
 ...
 =3D> 0x7853be50 <+72>:    lbu     v1,600(a2)
 ...
 (gdb) print (void *)$gp                  =20
 $4 =3D (void *) 0x78694270 <path+752>                              =20
 (gdb) print (void *)$v0
 $14 =3D (void *) 0x7853be20 <malloc_default+24>                            =
      =20
 (gdb) x/xw $gp - 23368
 0x7868e728:     0xffff9008
 
 But:
 
 (gdb) print (void *)$v0
 $15 =3D (void *) 0x7853be20 <malloc_default+24>
 (gdb) print (void *)$v1
 $16 =3D (void *) 0x786b9008
 
 $v1 is right, but $v0 is completely bonkers -- and doesn't match what
 is at -23368(gp), which surely  lw v0,-23368(gp)  should have loaded!
 
 How does $v0 wind up with the address of the _rdhwr instruction_ when
 I run this _not_ under gdb???  Are we dumping core dumps wrong, hiding
 some underlying problem?
 
 --=_3GH/d8a4GiEuk6sY511/abKYWvHzDwXu
 Content-Type: text/plain; charset="ISO-8859-1"; name="rdhwr"
 Content-Transfer-Encoding: quoted-printable
 Content-Disposition: attachment; filename="rdhwr.c"
 
 #include <machine/lwp_private.h>
 
 #include <link_elf.h>
 #include <lwp.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 __thread int foo =3D 123;
 
 struct tls_tcb tcb_storage[0x8000];
 
 __noinline __used
 static struct tls_tcb *
 gettcb_rdhwr(void)
 {
 	struct tls_tcb *tcb;
 
 	asm(".set push\n\t"
 	    ".set mips32r2\n\t"
 	    "rdhwr $3,$29\n\t"
 	    "move %[tcb],$3\n\t"
 	    ".set pop"
 	    : [tcb]"=3Dr"(tcb)
 	    :
 	    : "$3");
 
 	return tcb - (TLS_TP_OFFSET/sizeof(*tcb) + 1);
 }
 
 static int
 dlitercb(struct dl_phdr_info *dlpi, size_t size, void *cookie)
 {
 	char buf[1024];
 
 	snprintf(buf, sizeof(buf),
 	    "dlpi_addr=3D0x%lx\n"
 	    "dlpi_name=3D%s\n"
 	    "dlpi_phdr=3D%p\n"
 	    "dlpi_phnum=3D%d\n"
 	    "dlpi_adds=3D%lld\n"
 	    "dlpi_subs=3D%lld\n"
 	    "dlpi_tls_modid=3D0x%zx\n"
 	    "dlpi_tls_data=3D%p\n"
 	    "\n",
 	    (long)dlpi->dlpi_addr,
 	    dlpi->dlpi_name,
 	    dlpi->dlpi_phdr,
 	    dlpi->dlpi_phnum,
 	    dlpi->dlpi_adds,
 	    dlpi->dlpi_subs,
 	    dlpi->dlpi_tls_modid,
 	    dlpi->dlpi_tls_data);
 	(void)write(STDOUT_FILENO, buf, strlen(buf));
 
 	return 0;
 }
 
 int
 main(void)
 {
 	char buf[1024];
 	struct tls_tcb *tcb_rdhwr, *tcb_syscall;
 
 	(void)dl_iterate_phdr(&dlitercb, NULL);
 
 	tcb_syscall =3D __lwp_gettcb_fast();
 	snprintf_ss(buf, sizeof(buf), "tcb_syscall %p\n", tcb_syscall);
 	(void)write(STDOUT_FILENO, buf, strlen(buf));
 
 	tcb_rdhwr =3D gettcb_rdhwr();
 	snprintf_ss(buf, sizeof(buf), "tcb_rdhwr %p\n", tcb_rdhwr);
 	(void)write(STDOUT_FILENO, buf, strlen(buf));
 
 	printf("%p\n", malloc(1));
 	fflush(stdout);
 
 #if 1
 	__USE(tcb_storage);
 	__USE(tcb_syscall);
 	__USE(tcb_rdhwr);
 	__USE(buf);
 #else
 	__lwp_settcb(tcb_storage);
 
 	tcb_syscall =3D __lwp_gettcb_fast();
 	snprintf_ss(buf, sizeof(buf), "tcb_syscall %p\n", tcb_syscall);
 	(void)write(STDOUT_FILENO, buf, strlen(buf));
 
 	tcb_rdhwr =3D gettcb_rdhwr();
 	snprintf_ss(buf, sizeof(buf), "tcb_rdhwr %p\n", tcb_rdhwr);
 	(void)write(STDOUT_FILENO, buf, strlen(buf));
 #endif
 
 	return 0;
 }
 
 --=_3GH/d8a4GiEuk6sY511/abKYWvHzDwXu--
 


Home | Main Index | Thread Index | Old Index