Subject: mips gcc4 ld.elf_so problems
To: None <tech-toolchain@netbsd.org>
From: Simon Burge <simonb@NetBSD.org>
List: tech-toolchain
Date: 06/27/2006 23:02:48
ld.elf_so is having alignment issues with mips and gcc4 C++ programs.
The core of the problem is that we have a function store_ptr() which is

	static inline void
	store_ptr(void *where, Elf_Addr val)
	{
		memcpy(where, &val, sizeof(val));
	}

The idea of this is that we sometimes want to store a Elf_Addr
(uint32_t) at an unaligned address, so we use memcpy to write the data.
This was added for gcc3 which started using aligned data for some GOT
info.

The code that calls this does:

	Elf_Addr *where;

	where = ....
	store_ptr(where, value);

With gcc3 this generated code like:

        swl     $2,3($8)
        swr     $2,0($8)

and with gcc4 this generates:

        sw      $2,0($9)

That is, with gcc4 it's an aligned store instead of an unaligned store.

Just adding casts at various places didn't help, both at the caller
and inside store_ptr() didn't help.  The patch below works around this
by starting with a void * variable, which obviously doesn't have any
alignment constraints.

Now, the question is "is what gcc4 is doing right?".  Since we started
with a pointer to Elf_Addr, is the optimiser legal in saying that that
pointer is always aligned so it an optimise the store in to an aligned
store?  Or is it a bug with gcc4 and memcpy and assuming alignment?

Cheers,
Simon.
--

Index: libexec/ld.elf_so/arch/mips/mips_reloc.c
===================================================================
RCS file: /cvsroot/src/libexec/ld.elf_so/arch/mips/mips_reloc.c,v
retrieving revision 1.51
diff -d -p -u -r1.51 mips_reloc.c
--- libexec/ld.elf_so/arch/mips/mips_reloc.c	3 Apr 2006 13:23:15 -0000	1.51
+++ libexec/ld.elf_so/arch/mips/mips_reloc.c	27 Jun 2006 05:20:55 -0000
@@ -86,6 +86,7 @@ _rtld_relocate_nonplt_self(Elf_Dyn *dynp
 {
 	const Elf_Rel *rel = 0, *rellim;
 	Elf_Addr relsz = 0;
+	void *wherev;
 	Elf_Addr *where;
 	const Elf_Sym *symtab = NULL, *sym;
 	Elf_Addr *got = NULL;
@@ -133,7 +134,8 @@ _rtld_relocate_nonplt_self(Elf_Dyn *dynp
 
 	rellim = (const Elf_Rel *)((caddr_t)rel + relsz);
 	for (; rel < rellim; rel++) {
-		where = (Elf_Addr *)(relocbase + rel->r_offset);
+		wherev = (void *)(relocbase + rel->r_offset);
+		where = wherev;
 
 		switch (ELF_R_TYPE(rel->r_info)) {
 		case R_TYPE(NONE):
@@ -146,7 +148,7 @@ _rtld_relocate_nonplt_self(Elf_Dyn *dynp
 			if (__predict_true(RELOC_ALIGNED_P(where)))
 				*where += relocbase;
 			else
-				store_ptr(where, load_ptr(where) + relocbase);
+				store_ptr(wherev, load_ptr(wherev) + relocbase);
 			break;
 
 		default:
@@ -246,9 +248,12 @@ _rtld_relocate_nonplt_objects(const Obj_
 	got = obj->pltgot;
 	for (rel = obj->rel; rel < obj->rellim; rel++) {
 		Elf_Addr        *where, tmp;
+		void		*wherev;
 		unsigned long	 symnum;
 
-		where = (Elf_Addr *)(obj->relocbase + rel->r_offset);
+		/* store address in a non-alignment restricted pointer first */
+		wherev = obj->relocbase + rel->r_offset;
+		where = wherev;
 		symnum = ELF_R_SYM(rel->r_info);
 
 		switch (ELF_R_TYPE(rel->r_info)) {
@@ -263,12 +268,12 @@ _rtld_relocate_nonplt_objects(const Obj_
 				if (__predict_true(RELOC_ALIGNED_P(where)))
 					tmp = *where;
 				else
-					tmp = load_ptr(where);
+					tmp = load_ptr(wherev);
 				tmp += got[obj->local_gotno + symnum - obj->gotsym];
 				if (__predict_true(RELOC_ALIGNED_P(where)))
 					*where = tmp;
 				else
-					store_ptr(where, tmp);
+					store_ptr(wherev, tmp);
 
 				rdbg(("REL32/G %s in %s --> %p in %s",
 				    obj->strtab + def->st_name, obj->path,
@@ -293,7 +298,7 @@ _rtld_relocate_nonplt_objects(const Obj_
 				if (__predict_true(RELOC_ALIGNED_P(where)))
 					tmp = *where;
 				else
-					tmp = load_ptr(where);
+					tmp = load_ptr(wherev);
 
 				if (def->st_info ==
 				    ELF_ST_INFO(STB_LOCAL, STT_SECTION)
@@ -307,7 +312,7 @@ _rtld_relocate_nonplt_objects(const Obj_
 				if (__predict_true(RELOC_ALIGNED_P(where)))
 					*where = tmp;
 				else
-					store_ptr(where, tmp);
+					store_ptr(wherev, tmp);
 
 				rdbg(("REL32/L %s in %s --> %p in %s",
 				    obj->strtab + def->st_name, obj->path,
@@ -319,7 +324,7 @@ _rtld_relocate_nonplt_objects(const Obj_
 			rdbg(("sym = %lu, type = %lu, offset = %p, "
 			    "contents = %p, symbol = %s",
 			    symnum, (u_long)ELF_R_TYPE(rel->r_info),
-			    (void *)rel->r_offset, (void *)load_ptr(where),
+			    (void *)rel->r_offset, (void *)load_ptr(wherev),
 			    obj->strtab + obj->symtab[symnum].st_name));
 			_rtld_error("%s: Unsupported relocation type %ld "
 			    "in non-PLT relocations\n",