Subject: bin/37347: ld.elf_so does not execute .init/.fini functions in order
To: None <gnats-admin@netbsd.org, netbsd-bugs@netbsd.org>
From: J.T. Conklin <jtc@acorntoolworks.com>
List: netbsd-bugs
Date: 11/09/2007 06:20:01
>Number:         37347
>Category:       bin
>Synopsis:       ld.elf_so does not execute .init/.fini functions in order
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    bin-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Fri Nov 09 06:20:01 +0000 2007
>Originator:     J.T. Conklin
>Release:        NetBSD 4.0_RC3
>Organization:
J.T. Conklin
>Environment:
System: NetBSD vm1.acorntoolworks.com 4.0_RC3 NetBSD 4.0_RC3 (GENERIC) #0: Tue Oct 16 01:14:06 PDT 2007  builds@wb34:/home/builds/ab/netbsd-4-0-RC3/i386/200710160011Z-obj/home/builds/ab/netbsd-4-0-RC3/src/sys/arch/i386/compile/GENERIC i386
Architecture: i386
Machine: i386

>Description:
As reported in recent messages to tech-userland, I encountered
problems with ACE (A C++ Library/Framework) and TAO (A CORBA ORB
implementation that uses ACE) because TAO's static constructors 
used the pthread library to create thread specific storage before 
the pthread library was initialized itself.

Further discussion revealed that ld.elf_so did not perform the
required (topological) sort of the objects before executing the
.init/.fini functions in each shared library.

>How-To-Repeat:

Since ACE/TAO are huge libraries, I wrote a unit test.  The program
"foobar" is linked with two shared libraries, "libfoo.so" and
"libbar.sh".  "libbar.so" depends on "libfoo.so". To further
complicate things, "libfoo.so" dynamically loads "libtar.so" with
dlopen() in an .init function (freeing it in .fini), and "libtar.so"
depends on both "libbar.so" and "libfoo.so".  Finally, the main
program foobar dynamically loads "libdll.so", which depends on
"libdep1.so" and "libdep2.so", and "libdep1.so" depends on
"libdep2.so".

The above may be difficult to visualize.  Here is an ASCII diagram
that tries to show the dependencies.

  foobar ----+---------------+  
             |               |
  libtar.so -+-> libbar.so  -+-> libfoo.so
             |               |
             +---------------+

  libdll.so -+-> libdep1.so -+-> libdep2.so
             |               |
             +---------------+

Each of the above shared libraries .init and .fini functions writes a
string to standard error, so you can see the order.

It should be:

$ ./foobar
foo_ctor
bar_ctor
tar_ctor
main_ctor
dep1_ctor
dep2_ctor
dll_ctor
dll_dtor
dep2_dtor
dep1_dtor
main_dtor
tar_dtor
bar_dtor
foo_dtor

While currently I get:

$ ./foobar
bar_ctor
foo_ctor
tar_ctor
main_ctor
dep1 ctor
dep2 ctor
dll ctor
dll dtor
dep2 dtor
dep1 dtor
main_dtor
foo_dtor
Invalid shared object handle 0xbdbed400
bar_dtor
tar_dtor

I tried this same test on FreeBSD 6.3, Solaris 10, and Ubuntu 7.10.
FreeBSD and Solaris seem to have had trouble with the dynamically
loaded libtar.so --- they both executed its .init function before
libbar.so's; but both got the destruction order correct.  The test
produced the expected results on Ubuntu.

# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
#	Makefile
#	bar.c
#	dep1.c
#	dep2.c
#	dll.c
#	foo.c
#	main.c
#	tar.c
#
echo x - Makefile
sed 's/^X//' >Makefile << 'END-of-Makefile'
XCC=gcc
XCFLAGS=-O2 
XLDFLAGS=-L. -R. # -Wl,--dynamic-linker,/tmp/ld.elf_so
X
Xall:	barfoo foobar libdll.so libtar.so
X
Xclean:
X	rm -f barfoo foobar *.o *.so
X
Xfoobar: libfoo.so libbar.so
X
Xbarfoo: libfoo.so libbar.so main.o
X	$(CC) $(LDFLAGS) -o $@ main.o -lbar -lfoo
X
Xfoobar: libfoo.so libbar.so main.o
X	$(CC) $(LDFLAGS) -o $@ main.o -lfoo -lbar
X
Xmain.o: main.c
X	$(CC) $(CFLAGS) -o $@ -c main.c
X
Xfoo.o:	foo.c
X	$(CC) $(CFLAGS) -o $@ -fPIC -c foo.c
X
Xbar.o:	bar.c
X	$(CC) $(CFLAGS) -o $@ -fPIC -c bar.c
X
Xtar.o:	tar.c
X	$(CC) $(CFLAGS) -o $@ -fPIC -c tar.c
X
Xdll.o:	dll.c
X	$(CC) $(CFLAGS) -o $@ -fPIC -c dll.c
X
Xdep1.o:	dep1.c
X	$(CC) $(CFLAGS) -o $@ -fPIC -c dep1.c
X
Xdep2.o:	dep2.c
X	$(CC) $(CFLAGS) -o $@ -fPIC -c dep2.c
X
Xlibfoo.so: foo.o
X	$(CC) $(LDFLAGS) -o $@ -shared foo.o -lc
X
Xlibbar.so: bar.o libfoo.so
X	$(CC) $(LDFLAGS) -o $@ -shared bar.o -lfoo -lc
X
Xlibtar.so: tar.o libbar.so libfoo.so
X	$(CC) $(LDFLAGS) -o $@ -shared tar.o -lbar -lfoo -lc
X
Xlibdll.so: dll.o libdep1.so libdep2.so
X	$(CC) $(LDFLAGS) -o $@ -shared dll.o -ldep2 -ldep1 -lc
X
Xlibdep1.so: dep1.o libdep2.so
X	$(CC) $(LDFLAGS) -o $@ -shared dep1.o -ldep2 -lc
X
Xlibdep2.so: dep2.o
X	$(CC) $(LDFLAGS) -o $@ -shared dep2.o -lc
END-of-Makefile
echo x - bar.c
sed 's/^X//' >bar.c << 'END-of-bar.c'
X#include <unistd.h>
X
Xvoid bar_ctor() __attribute__((__constructor__));
Xvoid bar_dtor() __attribute__((__destructor__));
X
Xvoid
Xbar_ctor()
X{
X   write(2, "bar_ctor\n", 9);
X}
X
Xvoid
Xbar_dtor()
X{
X   write(2, "bar_dtor\n", 9);
X}
END-of-bar.c
echo x - dep1.c
sed 's/^X//' >dep1.c << 'END-of-dep1.c'
X#include <unistd.h>
X#include <dlfcn.h>
X
Xvoid *handle;
Xvoid dep1_ctor() __attribute__((__constructor__));
Xvoid dep1_dtor() __attribute__((__destructor__));
X
Xvoid
Xdep1_ctor()
X{
X   write(2, "dep1 ctor\n", 10);
X   handle = dlopen ("libfoo.so", RTLD_LAZY);
X}
X
Xvoid
Xdep1_dtor()
X{
X   write(2, "dep1 dtor\n", 10);
X   dlclose (handle);
X}
END-of-dep1.c
echo x - dep2.c
sed 's/^X//' >dep2.c << 'END-of-dep2.c'
X#include <unistd.h>
X
Xvoid dep_ctor() __attribute__((__constructor__));
Xvoid dep_dtor() __attribute__((__destructor__));
X
Xvoid
Xdep_ctor()
X{
X   write(2, "dep2 ctor\n", 10);
X}
X
Xvoid
Xdep_dtor()
X{
X   write(2, "dep2 dtor\n", 10);
X}
END-of-dep2.c
echo x - dll.c
sed 's/^X//' >dll.c << 'END-of-dll.c'
X#include <unistd.h>
X
Xvoid dll_ctor() __attribute__((__constructor__));
Xvoid dll_dtor() __attribute__((__destructor__));
X
Xvoid
Xdll_ctor()
X{
X   write(2, "dll ctor\n", 9);
X}
X
Xvoid
Xdll_dtor()
X{
X   write(2, "dll dtor\n", 9);
X}
END-of-dll.c
echo x - foo.c
sed 's/^X//' >foo.c << 'END-of-foo.c'
X#include <unistd.h>
X#include <dlfcn.h>
X
Xvoid *handle;
Xvoid foo_ctor() __attribute__((__constructor__));
Xvoid foo_dtor() __attribute__((__destructor__));
X
Xvoid
Xfoo_ctor()
X{
X   write(2, "foo_ctor\n", 9);
X   handle = dlopen("libtar.so", RTLD_LAZY);
X}
X
Xvoid
Xfoo_dtor()
X{
X   write(2, "foo_dtor\n", 9);
X   if (handle) 
X	dlclose(handle);
X}
END-of-foo.c
echo x - main.c
sed 's/^X//' >main.c << 'END-of-main.c'
X#include <unistd.h>
X#include <dlfcn.h>
X
Xvoid main_ctor() __attribute__((__constructor__));
Xvoid main_dtor() __attribute__((__destructor__));
X
Xvoid
Xmain_ctor()
X{
X   write(2, "main_ctor\n", 10);
X}
X
Xvoid
Xmain_dtor()
X{
X   write(2, "main_dtor\n", 10);
X}
X
Xint
Xmain()
X{
X   void *handle;
X
X   handle = dlopen("libdll.so", RTLD_LAZY);
X   dlclose(handle);
X
X   return 0;
X}
END-of-main.c
echo x - tar.c
sed 's/^X//' >tar.c << 'END-of-tar.c'
X#include <unistd.h>
X
Xvoid tar_ctor() __attribute__((__constructor__));
Xvoid tar_dtor() __attribute__((__destructor__));
X
Xvoid
Xtar_ctor()
X{
X   write(2, "tar_ctor\n", 9);
X}
X
Xvoid
Xtar_dtor()
X{
X   write(2, "tar_dtor\n", 9);
X}
END-of-tar.c
exit




>Fix:
The enclosed patch, relative to the netbsd-4 branch:

* replaces _rtld_objlist_add() with _rtld_objlist_push_head() and 
  _rtld_objlist_push_tail(). This allows the same sort routine to
  be used for both .init and .fini.

* adds _rtld_objlist_clear(), which clears a list of objects list and
  frees all elements.

* changes _rtld_call_fini_functions() to create a sorted object list,
  invoke all .fini functions that have not been called, and dispose
  the object list.  A "force" parameter is used to indicate process
  exit, otherwise only .fini is called only for those objects with a
  zero reference count

* changes _rtld_call_init_functions() to create a sorted object list,
  invoke all .init functions that have not been called, and dispose
  the object list.

* adds a "init_done", "init_called", and "fini_called" flags to the 
  Obj_Entry structure.  The first is used to mark an object visited
  during the tsort.  The others are used to ensure the .init and/or
  .fini routes are not called more than once, especially when a 
  shared library is loaded/unloaded be a .init/.fini routine in
  another shared library.

* adds _rtld_initlist_visit() and _rtld_initlist_tsort() functions 
  to perform the topological sort using a recursive depth first search.

Index: load.c
===================================================================
RCS file: /cvsroot/src/libexec/ld.elf_so/load.c,v
retrieving revision 1.31
diff -u -r1.31 load.c
--- load.c	21 Mar 2006 17:48:10 -0000	1.31
+++ load.c	9 Nov 2007 05:10:59 -0000
@@ -68,9 +68,19 @@
   SIMPLEQ_HEAD_INITIALIZER(_rtld_list_main);
 Objlist _rtld_list_global =	/* Objects dlopened with RTLD_GLOBAL */
   SIMPLEQ_HEAD_INITIALIZER(_rtld_list_global);
+  
+void
+_rtld_objlist_push_head(Objlist *list, Obj_Entry *obj)
+{
+	Objlist_Entry *elm;
+
+	elm = NEW(Objlist_Entry);
+	elm->obj = obj;
+	SIMPLEQ_INSERT_HEAD(list, elm, link);
+}
 
 void
-_rtld_objlist_add(Objlist *list, Obj_Entry *obj)
+_rtld_objlist_push_tail(Objlist *list, Obj_Entry *obj)
 {
 	Objlist_Entry *elm;
 
@@ -159,12 +169,12 @@
 	if (mode & RTLD_MAIN && !obj->mainref) {
 		obj->mainref = 1;
 		rdbg(("adding %p (%s) to _rtld_list_main", obj, obj->path));
-		_rtld_objlist_add(&_rtld_list_main, obj);
+		_rtld_objlist_push_tail(&_rtld_list_main, obj);
 	}
 	if (mode & RTLD_GLOBAL && !obj->globalref) {
 		obj->globalref = 1;
 		rdbg(("adding %p (%s) to _rtld_list_global", obj, obj->path));
-		_rtld_objlist_add(&_rtld_list_global, obj);
+		_rtld_objlist_push_tail(&_rtld_list_global, obj);
 	}
 #endif
 	return obj;
Index: rtld.c
===================================================================
RCS file: /cvsroot/src/libexec/ld.elf_so/rtld.c,v
retrieving revision 1.111.2.1
diff -u -r1.111.2.1 rtld.c
--- rtld.c	19 Jul 2007 14:38:16 -0000	1.111.2.1
+++ rtld.c	9 Nov 2007 05:10:59 -0000
@@ -109,35 +109,64 @@
 #endif /* RTLD_DEBUG */
 extern Elf_Dyn  _DYNAMIC;
 
-static void _rtld_call_fini_functions(Obj_Entry *);
-static void _rtld_call_init_functions(Obj_Entry *);
+static void _rtld_call_fini_functions(int);
+static void _rtld_call_init_functions(void);
+static void _rtld_initlist_visit(Objlist *, Obj_Entry *, int);
+static void _rtld_initlist_tsort(Objlist *, int);
 static Obj_Entry *_rtld_dlcheck(void *);
 static void _rtld_init_dag(Obj_Entry *);
 static void _rtld_init_dag1(Obj_Entry *, Obj_Entry *);
 static void _rtld_objlist_remove(Objlist *, Obj_Entry *);
+static void _rtld_objlist_clear(Objlist *);
 static void _rtld_unload_object(Obj_Entry *, bool);
 static void _rtld_unref_dag(Obj_Entry *);
 static Obj_Entry *_rtld_obj_from_addr(const void *);
 
 static void
-_rtld_call_fini_functions(Obj_Entry *first)
+_rtld_call_fini_functions(int force)
 {
-	Obj_Entry *obj;
+	Objlist_Entry *elm;
+	Objlist finilist;
+
+	dbg(("_rtld_call_fini_functions(%d)", force));
 
-	for (obj = first; obj != NULL; obj = obj->next)
-		if (obj->fini != NULL)
+	SIMPLEQ_INIT(&finilist);
+	_rtld_initlist_tsort(&finilist, 1);
+
+	SIMPLEQ_FOREACH(elm, &finilist, link) {
+		Obj_Entry *obj = elm->obj;
+		if ((force || obj->refcount == 0) && obj->fini != NULL && !obj->fini_called) {
+			dbg (("calling fini function %s at %p", 
+			      obj->path, (void *) obj->fini));
+			obj->fini_called = 1;
 			(*obj->fini)();
+		}
+	}
+
+        _rtld_objlist_clear(&finilist);
 }
 
 static void
-_rtld_call_init_functions(Obj_Entry *first)
+_rtld_call_init_functions()
 {
+	Objlist_Entry *elm;
+	Objlist initlist;
 
-	if (first != NULL) {
-		_rtld_call_init_functions(first->next);
-		if (first->init != NULL)
-			(*first->init)();
+	dbg(("_rtld_call_init_functions()"));
+	SIMPLEQ_INIT(&initlist);
+	_rtld_initlist_tsort(&initlist, 0);
+
+	SIMPLEQ_FOREACH(elm, &initlist, link) {
+		Obj_Entry *obj = elm->obj;
+		if (obj->init != NULL && !obj->init_called) {
+			dbg (("calling init function %s at %p", 
+			     obj->path, (void *) obj->init));
+			obj->init_called = 1;
+			(*obj->init)();
+		}
 	}
+
+        _rtld_objlist_clear(&initlist);
 }
 
 /*
@@ -196,10 +225,9 @@
 static void
 _rtld_exit(void)
 {
-
 	dbg(("rtld_exit()"));
 
-	_rtld_call_fini_functions(_rtld_objlist->next);
+	_rtld_call_fini_functions(1);
 }
 
 /*
@@ -408,7 +436,7 @@
 
 	++_rtld_objmain->refcount;
 	_rtld_objmain->mainref = 1;
-	_rtld_objlist_add(&_rtld_list_main, _rtld_objmain);
+	_rtld_objlist_push_tail(&_rtld_list_main, _rtld_objmain);
 
 	/* Initialize a fake symbol for resolving undefined weak references. */
 	_rtld_sym_zero.st_info = ELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
@@ -463,7 +491,7 @@
 		*real___mainprog_obj = _rtld_objmain;
 
 	dbg(("calling _init functions"));
-	_rtld_call_init_functions(_rtld_objmain->next);
+	_rtld_call_init_functions();
 
 	dbg(("control at program entry point = %p, obj = %p, exit = %p",
 	     _rtld_objmain->entry, _rtld_objmain, _rtld_exit));
@@ -507,6 +535,46 @@
 }
 
 static void
+_rtld_initlist_visit(Objlist* list, Obj_Entry *obj, int rev)
+{
+	Needed_Entry* elm;
+
+//	dbg(("_rtld_initlist_visit(%s)", obj->path));
+
+	if (obj->init_done)
+		return;
+	obj->init_done = 1;
+
+	for (elm = obj->needed; elm != NULL; elm = elm->next) {
+		if (elm->obj != NULL) {
+			_rtld_initlist_visit(list, elm->obj, rev);
+		}
+	}
+
+	if (rev) {
+		_rtld_objlist_push_head(list, obj);
+	} else {
+		_rtld_objlist_push_tail(list, obj);
+	}
+}
+
+static void
+_rtld_initlist_tsort(Objlist* list, int rev)
+{
+	dbg(("_rtld_initlist_tsort"));
+
+	Obj_Entry* obj;
+
+	for (obj = _rtld_objlist->next; obj; obj = obj->next) {
+		obj->init_done = 0;
+	}
+
+	for (obj = _rtld_objlist->next; obj; obj = obj->next) {
+		_rtld_initlist_visit(list, obj, rev);
+	}
+}
+
+static void
 _rtld_init_dag(Obj_Entry *root)
 {
 
@@ -523,8 +591,8 @@
 			return;
 		rdbg(("add %p (%s) to %p (%s) DAG", obj, obj->path, root,
 		    root->path));
-		_rtld_objlist_add(&obj->dldags, root);
-		_rtld_objlist_add(&root->dagmembers, obj);
+		_rtld_objlist_push_tail(&obj->dldags, root);
+		_rtld_objlist_push_tail(&root->dagmembers, obj);
 	}
 	for (needed = obj->needed; needed != NULL; needed = needed->next)
 		if (needed->obj != NULL)
@@ -546,9 +614,7 @@
 
 		/* Finalize objects that are about to be unmapped. */
 		if (do_fini_funcs)
-			for (obj = _rtld_objlist->next;  obj != NULL;  obj = obj->next)
-				if (obj->refcount == 0 && obj->fini != NULL)
-					(*obj->fini)();
+			_rtld_call_fini_functions(0);
 
 		/* Remove the DAG from all objects' DAG lists. */
 		SIMPLEQ_FOREACH(elm, &root->dagmembers, link)
@@ -656,8 +722,9 @@
 				_rtld_unload_object(obj, false);
 				obj->dl_refcount--;
 				obj = NULL;
-			} else
-				_rtld_call_init_functions(obj);
+			} else {
+				_rtld_call_init_functions();
+			}
 		}
 	}
 	_rtld_debug.r_state = RT_CONSISTENT;
@@ -935,3 +1002,13 @@
 		free(elm);
 	}
 }
+
+static void
+_rtld_objlist_clear(Objlist *list)
+{
+	while (!SIMPLEQ_EMPTY(list)) {
+		Objlist_Entry* elm = SIMPLEQ_FIRST(list);
+		SIMPLEQ_REMOVE_HEAD(list, link);
+		free(elm);
+	}
+}
Index: rtld.h
===================================================================
RCS file: /cvsroot/src/libexec/ld.elf_so/rtld.h,v
retrieving revision 1.73.4.1
diff -u -r1.73.4.1 rtld.h
--- rtld.h	27 Sep 2007 13:44:48 -0000	1.73.4.1
+++ rtld.h	9 Nov 2007 05:10:59 -0000
@@ -177,7 +177,12 @@
 			printed:1,	/* True if ldd has printed it */
 			isdynamic:1,	/* True if this is a pure PIC object */
 			mainref:1,	/* True if on _rtld_list_main */
-			globalref:1;	/* True if on _rtld_list_global */
+			globalref:1,	/* True if on _rtld_list_global */
+			init_done:1,	/* True if .init has been added */
+			init_called:1,	/* True if .init function has been 
+					 * called */
+			fini_called:1;	/* True if .fini function has been 
+					 * called */
 
 	struct link_map linkmap;	/* for GDB */
 
@@ -222,7 +227,8 @@
 void _rtld_debug_state(void);
 void _rtld_linkmap_add(Obj_Entry *);
 void _rtld_linkmap_delete(Obj_Entry *);
-void _rtld_objlist_add(Objlist *, Obj_Entry *);
+void _rtld_objlist_push_head(Objlist *, Obj_Entry *);
+void _rtld_objlist_push_tail(Objlist *, Obj_Entry *);
 Objlist_Entry *_rtld_objlist_find(Objlist *, const Obj_Entry *);
 
 /* headers.c */