Subject: lib/24324: telldir issues
To: None <gnats-bugs@gnats.NetBSD.org>
From: None <arnej@europe.yahoo-inc.com>
List: netbsd-bugs
Date: 02/05/2004 20:10:27
>Number: 24324
>Category: lib
>Synopsis: telldir has several issues
>Confidential: no
>Severity: non-critical
>Priority: medium
>Responsible: lib-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Thu Feb 05 20:11:00 UTC 2004
>Closed-Date:
>Last-Modified:
>Originator: Arne H Juul
>Release: NetBSD 1.6ZH
>Organization:
none
>Environment:
System: NetBSD usikkert.trondheim.corp.yahoo.com 1.6ZH NetBSD 1.6ZH (USIKKERT) #0: Tue Jan 20 19:46:18 UTC 2004 arnej@usikkert.trondheim.corp.yahoo.com:/usr/obj/sys/arch/i386/compile/USIKKERT i386
Architecture: i386
Machine: i386
>Description:
After debugging a threading crash originally seen on FreeBSD-stable
I decided to check if NetBSD had similar issues, and found that it
did. The original bug is a threading problem that can cause programs
to crash if several threads open/close directories simultaneously,
if they both manipulate the same linked list in the global
dd_hash table. Actually NetBSD has a mutex around these regions
already but that mutex is per-directory while the hash is global.
This wouldn't really be a problem since virtually no programs
use telldir()/seekdir(), but rewinddir() is implemented using
an implicit telldir() in opendir() meaning that every open
directory will use one entry in some slot in the hash table.
There are also two other issues that probably doesn't
matter at all in practice:
telldir.c has a SINGLEUSE defined that means a location returned
from telldir() should be used once and exactly once as argument to
seekdir(). If the location is not used, memory will leak.
On the other hand, if it is used it will be lost and repeated
use will become a no-op, breaking the seekdir() API.
Also, if one passes a location to seekdir() and then immediately
does a telldir() afterwards, the standards say one should get
the "same directory position" arguable meaning the same return
value.
Eliminating the hash totally and making a linked list in each
directory seems to be the logical solution, and I've written
a patch to do that. This patch fixes all the issues above
since now we can do cleanup from closedir(), eliminating the
SINGLEUSE, and we also traverse the list on telldir() to check
if we could return an earlier cookie. This also bounds the
entries in the list to {entries in the directory}+1 in case
somebody wants to use telldir() a lot.
>How-To-Repeat:
Here's a test program to detect memory leak, and
one to run seekdir() to the same location twice and
print the resulting next directory entries found. It
also prints the value of an immediate telldir() after seekdir().
# This is a shell archive. Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".
# This archive contains:
#
# telldir-memuse.c
# seek-twice.c
#
echo x - telldir-memuse.c
sed 's/^X//' >telldir-memuse.c << 'END-of-telldir-memuse.c'
X#include <stdlib.h>
X#include <unistd.h>
X#include <dirent.h>
X#include <err.h>
X
X
Xint main(void) {
X DIR *dp;
X long loc;
X void *memused;
X int i;
X int oktouse = 4096;
X
X dp = opendir(".");
X if (dp == NULL)
X err(EXIT_FAILURE, "Could not open current directory");
X loc = telldir(dp);
X memused = sbrk(0);
X closedir(dp);
X
X for (i=0; i<1000; i++) {
X dp = opendir(".");
X if (dp == NULL)
X err(EXIT_FAILURE, "Could not open current directory");
X loc = telldir(dp);
X closedir(dp);
X
X if (sbrk(0) - memused > oktouse) {
X warnx("Used %d extra bytes for %d telldir calls",
X (sbrk(0)-memused), i);
X oktouse = sbrk(0) - memused;
X }
X }
X if (oktouse > 4096) {
X errx(EXIT_FAILURE, "Failure: leaked %d bytes", oktouse);
X } else {
X printf("OK: used %d bytes\n", sbrk(0)-memused);
X }
X return 0;
X}
END-of-telldir-memuse.c
echo x - seek-twice.c
sed 's/^X//' >seek-twice.c << 'END-of-seek-twice.c'
X#include <stdio.h>
X#include <string.h>
X#include <dirent.h>
X#include <sys/stat.h>
X
Xint main(void) {
X DIR *dp;
X
X mkdir("t", 0755);
X creat("t/a", 0600);
X creat("t/b", 0600);
X creat("t/c", 0600);
X
X if (dp = opendir("t")) {
X char *wasname;
X struct dirent *entry;
X long here;
X
X /* skip two */
X entry = readdir(dp);
X entry = readdir(dp);
X entry = readdir(dp);
X printf("first found: %s\n", entry->d_name);
X
X here = telldir(dp);
X printf("pos returned by telldir: %d\n", here);
X entry = readdir(dp);
X printf("second found: %s\n", entry->d_name);
X
X wasname = strdup(entry->d_name);
X entry = readdir(dp);
X printf("third found: %s\n", entry->d_name);
X
X seekdir(dp, here);
X entry = readdir(dp);
X printf("should be == second: %s\n", entry->d_name);
X
X printf("pos given to seekdir: %d\n", here);
X seekdir(dp, here);
X here = telldir(dp);
X printf("pos returned by telldir: %d\n", here);
X
X entry = readdir(dp);
X printf("should be == second: %s\n", entry->d_name);
X
X seekdir(dp, here);
X entry = readdir(dp);
X printf("should be == second: %s\n", entry->d_name);
X
X closedir(dp);
X }
X return 0;
X}
END-of-seek-twice.c
exit
>Fix:
proposed patch:
Index: include/dirent.h
===================================================================
RCS file: /usr/cvs/src/include/dirent.h,v
retrieving revision 1.21
diff -u -r1.21 dirent.h
--- include/dirent.h 7 Aug 2003 09:44:10 -0000 1.21
+++ include/dirent.h 5 Feb 2004 19:13:44 -0000
@@ -54,6 +54,19 @@
/* definitions for library routines operating on directories. */
#define DIRBLKSIZ 1024
+/*
+ * One struct _dirpos is malloced to describe the current directory
+ * position each time telldir is called. It records the current magic
+ * cookie returned by getdirentries and the offset within the buffer
+ * associated with that return value.
+ */
+struct _dirpos {
+ struct _dirpos *_dpnxt;/* next structure in list */
+ long _dpcookie; /* key associated with structure */
+ off_t _dpseek; /* magic cookie returned by getdirentries */
+ long _dploc; /* offset of entry in buffer */
+};
+
/* structure describing an open directory. */
struct _dirdesc {
int dd_fd; /* file descriptor associated with directory */
@@ -65,6 +78,7 @@
long dd_rewind; /* magic cookie for rewinding */
int dd_flags; /* flags for readdir */
void *dd_lock; /* lock for concurrent access */
+ struct _dirpos *dd_plist; /* linked list of telldir cookies */
};
#define dirfd(dirp) ((dirp)->dd_fd)
@@ -92,7 +106,7 @@
void rewinddir __P((DIR *));
#if defined(_XOPEN_SOURCE) || defined(_NETBSD_SOURCE)
void seekdir __P((DIR *, long));
-long telldir __P((const DIR *));
+long telldir __P((DIR *));
#endif /* defined(_NETBSD_SOURCE) || defined(_XOPEN_SOURCE) */
#if defined(_NETBSD_SOURCE)
DIR *__opendir2 __P((const char *, int));
Index: lib/libc/gen/closedir.c
===================================================================
RCS file: /usr/cvs/src/lib/libc/gen/closedir.c,v
retrieving revision 1.13
diff -u -r1.13 closedir.c
--- lib/libc/gen/closedir.c 7 Aug 2003 16:42:46 -0000 1.13
+++ lib/libc/gen/closedir.c 5 Feb 2004 19:10:16 -0000
@@ -60,6 +60,7 @@
DIR *dirp;
{
int fd;
+ struct _dirpos *poslist;
_DIAGASSERT(dirp != NULL);
@@ -67,7 +68,14 @@
if (__isthreaded)
mutex_lock((mutex_t *)dirp->dd_lock);
#endif
- __seekdir(dirp, dirp->dd_rewind); /* free seekdir storage */
+ /* free seekdir storage */
+ poslist = dirp->dd_plist;
+ while (poslist != NULL) {
+ struct _dirpos *nextpos = poslist->_dpnxt;
+ free(poslist);
+ poslist = nextpos;
+ }
+ dirp->dd_plist = NULL;
fd = dirp->dd_fd;
dirp->dd_fd = -1;
dirp->dd_loc = 0;
Index: lib/libc/gen/directory.3
===================================================================
RCS file: /usr/cvs/src/lib/libc/gen/directory.3,v
retrieving revision 1.24
diff -u -r1.24 directory.3
--- lib/libc/gen/directory.3 7 Aug 2003 16:42:47 -0000 1.24
+++ lib/libc/gen/directory.3 28 Jan 2004 17:23:18 -0000
@@ -53,7 +53,7 @@
.Ft int
.Fn readdir_r "DIR * restrict dirp" "struct dirent * restrict entry" "struct dirent ** restrict result"
.Ft long
-.Fn telldir "const DIR *dirp"
+.Fn telldir "DIR *dirp"
.Ft void
.Fn seekdir "DIR *dirp" "long loc"
.Ft void
Index: lib/libc/gen/opendir.c
===================================================================
RCS file: /usr/cvs/src/lib/libc/gen/opendir.c,v
retrieving revision 1.24
diff -u -r1.24 opendir.c
--- lib/libc/gen/opendir.c 7 Aug 2003 16:42:55 -0000 1.24
+++ lib/libc/gen/opendir.c 28 Jan 2004 13:06:08 -0000
@@ -308,6 +308,7 @@
mutex_init((mutex_t *)dirp->dd_lock, NULL);
}
#endif
+ dirp->dd_plist = NULL;
dirp->dd_rewind = telldir(dirp);
return (dirp);
error:
Index: lib/libc/gen/rewinddir.c
===================================================================
RCS file: /usr/cvs/src/lib/libc/gen/rewinddir.c,v
retrieving revision 1.10
diff -u -r1.10 rewinddir.c
--- lib/libc/gen/rewinddir.c 7 Aug 2003 16:42:56 -0000 1.10
+++ lib/libc/gen/rewinddir.c 28 Jan 2004 20:57:26 -0000
@@ -53,6 +53,6 @@
{
- __seekdir(dirp, dirp->dd_rewind);
+ seekdir(dirp, dirp->dd_rewind);
dirp->dd_rewind = telldir(dirp);
}
Index: lib/libc/gen/seekdir.c
===================================================================
RCS file: /usr/cvs/src/lib/libc/gen/seekdir.c,v
retrieving revision 1.11
diff -u -r1.11 seekdir.c
--- lib/libc/gen/seekdir.c 7 Aug 2003 16:42:56 -0000 1.11
+++ lib/libc/gen/seekdir.c 5 Feb 2004 19:23:16 -0000
@@ -42,7 +42,10 @@
#include "reentrant.h"
#include <sys/param.h>
+#include <assert.h>
#include <dirent.h>
+#include <stdlib.h>
+#include <unistd.h>
#ifdef __weak_alias
__weak_alias(seekdir,_seekdir)
@@ -50,21 +53,39 @@
/*
* Seek to an entry in a directory.
- * __seekdir is in telldir.c so that it can share opaque data structures.
+ * Only values returned by "telldir" should be passed to seekdir.
*/
void
seekdir(dirp, loc)
DIR *dirp;
long loc;
{
+ struct _dirpos *lp;
+ struct dirent *dp;
#ifdef _REENTRANT
if (__isthreaded)
mutex_lock((mutex_t *)dirp->dd_lock);
#endif
+ _DIAGASSERT(dirp != NULL);
- __seekdir(dirp, loc);
-
+ lp = dirp->dd_plist;
+ while (lp != NULL) {
+ if (lp->_dpcookie == loc)
+ break;
+ lp = lp->_dpnxt;
+ }
+ if (lp == NULL)
+ return;
+ if (lp->_dploc == dirp->dd_loc && lp->_dpseek == dirp->dd_seek)
+ return;
+ dirp->dd_seek = lseek(dirp->dd_fd, (off_t)lp->_dpseek, SEEK_SET);
+ dirp->dd_loc = 0;
+ while (dirp->dd_loc < lp->_dploc) {
+ dp = readdir(dirp);
+ if (dp == NULL)
+ break;
+ }
#ifdef _REENTRANT
if (__isthreaded)
mutex_unlock((mutex_t *)dirp->dd_lock);
Index: lib/libc/gen/telldir.c
===================================================================
RCS file: /usr/cvs/src/lib/libc/gen/telldir.c,v
retrieving revision 1.15
diff -u -r1.15 telldir.c
--- lib/libc/gen/telldir.c 7 Aug 2003 16:42:58 -0000 1.15
+++ lib/libc/gen/telldir.c 5 Feb 2004 19:11:24 -0000
@@ -52,99 +52,43 @@
#endif
/*
- * The option SINGLEUSE may be defined to say that a telldir
- * cookie may be used only once before it is freed. This option
- * is used to avoid having memory usage grow without bound.
- */
-#define SINGLEUSE
-
-/*
- * One of these structures is malloced to describe the current directory
- * position each time telldir is called. It records the current magic
- * cookie returned by getdirentries and the offset within the buffer
- * associated with that return value.
- */
-struct ddloc {
- struct ddloc *loc_next;/* next structure in list */
- long loc_index; /* key associated with structure */
- off_t loc_seek; /* magic cookie returned by getdirentries */
- long loc_loc; /* offset of entry in buffer */
-};
-
-#define NDIRHASH 32 /* Num of hash lists, must be a power of 2 */
-#define LOCHASH(i) (((int)i)&(NDIRHASH-1))
-
-static long dd_loccnt; /* Index of entry for sequential readdir's */
-static struct ddloc *dd_hash[NDIRHASH]; /* Hash list heads for ddlocs */
-
-/*
* return a pointer into a directory
*/
long
telldir(dirp)
- const DIR *dirp;
+ DIR *dirp;
{
- long idx;
- struct ddloc *lp;
+ long idx = 0;
+ struct _dirpos *lp;
#ifdef _REENTRANT
if (__isthreaded)
mutex_lock((mutex_t *)dirp->dd_lock);
#endif
+ lp = dirp->dd_plist;
+ while (lp != NULL) {
+ if (lp->_dpseek == dirp->dd_seek &&
+ lp->_dploc == dirp->dd_loc)
+ {
+ /* made before, reuse */
+ return lp->_dpcookie;
+ }
+ if (idx < lp->_dpcookie)
+ idx = lp->_dpcookie;
+ lp = lp->_dpnxt;
+ }
- if ((lp = (struct ddloc *)malloc(sizeof(struct ddloc))) == NULL)
+ if ((lp = (struct _dirpos *)malloc(sizeof(struct _dirpos))) == NULL)
return (-1);
- idx = dd_loccnt++;
- lp->loc_index = idx;
- lp->loc_seek = dirp->dd_seek;
- lp->loc_loc = dirp->dd_loc;
- lp->loc_next = dd_hash[LOCHASH(idx)];
- dd_hash[LOCHASH(idx)] = lp;
+ lp->_dpcookie = idx+1;
+ lp->_dpseek = dirp->dd_seek;
+ lp->_dploc = dirp->dd_loc;
+ lp->_dpnxt = dirp->dd_plist;
+ dirp->dd_plist = lp;
+
#ifdef _REENTRANT
if (__isthreaded)
mutex_unlock((mutex_t *)dirp->dd_lock);
#endif
- return (idx);
-}
-
-/*
- * seek to an entry in a directory.
- * Only values returned by "telldir" should be passed to seekdir.
- */
-void
-__seekdir(dirp, loc)
- DIR *dirp;
- long loc;
-{
- struct ddloc *lp;
- struct ddloc **prevlp;
- struct dirent *dp;
-
- _DIAGASSERT(dirp != NULL);
-
- prevlp = &dd_hash[LOCHASH(loc)];
- lp = *prevlp;
- while (lp != NULL) {
- if (lp->loc_index == loc)
- break;
- prevlp = &lp->loc_next;
- lp = lp->loc_next;
- }
- if (lp == NULL)
- return;
- if (lp->loc_loc == dirp->dd_loc && lp->loc_seek == dirp->dd_seek)
- goto found;
- (void) lseek(dirp->dd_fd, (off_t)lp->loc_seek, SEEK_SET);
- dirp->dd_seek = lp->loc_seek;
- dirp->dd_loc = 0;
- while (dirp->dd_loc < lp->loc_loc) {
- dp = readdir(dirp);
- if (dp == NULL)
- break;
- }
-found:
-#ifdef SINGLEUSE
- *prevlp = lp->loc_next;
- free(lp);
-#endif
+ return (lp->_dpcookie);
}
>Release-Note:
>Audit-Trail:
>Unformatted:
-current as of 2004-02-01