Subject: bin/19387: fix for error in pwd_mkdb error reporting, plus some 25% more performance enhancements
To: None <gnats-bugs@gnats.netbsd.org>
From: Greg A. Woods <woods@weird.com>
List: netbsd-bugs
Date: 12/15/2002 00:06:05
>Number:         19387
>Category:       bin
>Synopsis:       fix for error in pwd_mkdb error reporting, plus some 25% more performance enhancements
>Confidential:   no
>Severity:       serious
>Priority:       high
>Responsible:    bin-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Sat Dec 14 21:07:00 PST 2002
>Closed-Date:
>Last-Modified:
>Originator:     Greg A. Woods
>Release:        2002-12/12
>Organization:
Planix, Inc.; Toronto, Ontario; Canada
>Environment:
System: NetBSD proven 1.5W NetBSD 1.5W (PROVEN) #1: Sat Aug 25 21:25:26 EDT 2001 woods@proven:/work/woods/NetBSD-src/sys/arch/i386/compile/PROVEN i386
Architecture: i386
Machine: i386
>Description:

	as has been discussed on the various lists several times over
	the past half decade, pwd_mkdb(8) has traditionally had very bad
	performance, especially for rebuilding large passwd files, and
	this is apparently due to the very poor tuning of the hash(3)
	parameters used to create the .db files.

	some attempt was made to do some minor tuning of pwd_mkdb a bit
	over a year ago, but apparently without much real thought to
	even the general nature of hash-bucket databases (and not even
	all the suggestions from the lists were applied verbatim).

	the changes below cut runtime on my P-Pro with slow IDE drives
	for an ~11,000 entry passwd file to approximately just 70% of
	the plain 8MB cachesize version (and to just 10% of the original
	2MB cachesize version!).  (and still just 75% the runtime of the
	plain 8MB version with ~22,000 entries)

	also in testing these changes using '-d' naively I discovered a
	very serious bug in the error reporting in the install() function.
	(buf was used as both target and source in an snprintf()!!!)

	cachesize really should also be tunable at runtime (thus the
	commented out option code in the patch below, but it should
	override); and nelem should probably be taken from a quick count
	of newlines in the file; and I think bsize really does make the
	most sense as being the same as the the filesystem bsize, but I
	haven't examined the hash(3) code to confirm.....  The ffactor
	setting could probably be much better tuned too....

	(but then maybe this should all be tossed in favour of the OWL
	project's so-called "TCB" shadow-file library which should be
	much more secure overall and can be made even faster than this
	thing ever can for most of the common admin operations....)

>How-To-Repeat:

>Fix:

Index: pwd_mkdb.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/basesrc/usr.sbin/pwd_mkdb/pwd_mkdb.c,v
retrieving revision 1.24
diff -c -c -r1.24 pwd_mkdb.c
*** pwd_mkdb.c	31 Jan 2002 22:44:06 -0000	1.24
--- pwd_mkdb.c	15 Dec 2002 04:01:24 -0000
***************
*** 69,76 ****
  #include <pwd.h>
  #endif
  
! #define	MAX_CACHESIZE	8*1024*1024
! #define	MIN_CACHESIZE	2*1024*1024
  
  #define	PERM_INSECURE	(S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)
  #define	PERM_SECURE	(S_IRUSR | S_IWUSR)
--- 69,89 ----
  #include <pwd.h>
  #endif
  
! /*
!  * Set reasonable bounds for HASHINFO.cachesize
!  *
!  * On systems with a large user base, a small cache size can lead to
!  * prohibitively long database file rebuild times.
!  *
!  * As a rough guide, the memory usage of pwd_mkdb will be a little bit more
!  * than twice the final cachesize.
!  */
! #ifndef MAX_CACHESIZE
! # define MAX_CACHESIZE	(64*1024*1024)
! #endif
! #ifndef MIN_CACHESIZE
! # define MIN_CACHESIZE	(2*1024*1024)
! #endif
  
  #define	PERM_INSECURE	(S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)
  #define	PERM_SECURE	(S_IRUSR | S_IWUSR)
***************
*** 83,92 ****
  #endif
  
  HASHINFO openinfo = {
! 	4096,		/* bsize */
! 	32,		/* ffactor */
! 	256,		/* nelem */
! 	0,		/* cachesize */
  	NULL,		/* hash() */
  	0		/* lorder */
  };
--- 96,105 ----
  #endif
  
  HASHINFO openinfo = {
! 	8192,		/* bsize: table bucket size (page size or disk bsize?) */
! 	100,		/* ffactor: keys per bucket */
! 	1024,		/* nelem: estimated final table size (adj below) */
! 	0,		/* cachesize: guide for max memory cache (adj below) */
  	NULL,		/* hash() */
  	0		/* lorder */
  };
***************
*** 148,153 ****
--- 161,172 ----
  		case 'L':			/* little-endian output */
  			lorder = LITTLE_ENDIAN;
  			break;
+ #if 0
+ 		/* XXX It's '-s' in FreeBSD and OpenBSD uses '-c' for "check/verify only" */
+ 		case 'c':			/* from BSDI BSD/386 */
+ 			cachesize = atoi(optarg) * 1024;
+ 			break;
+ #endif
  		case 'd':			/* set prefix */
  			strncpy(prefix, optarg, sizeof(prefix));
  			prefix[sizeof(prefix)-1] = '\0';
***************
*** 209,220 ****
  		error(pname);
  
  	/* Tweak openinfo values for large passwd files. */
! 	cachesize = st.st_size * 20;
  	if (cachesize > MAX_CACHESIZE)
  		cachesize = MAX_CACHESIZE;
  	else if (cachesize < MIN_CACHESIZE)
  		cachesize = MIN_CACHESIZE;
  	openinfo.cachesize = cachesize;
  
  	/* Open the temporary insecure password database. */
  	if (!secureonly) {
--- 228,240 ----
  		error(pname);
  
  	/* Tweak openinfo values for large passwd files. */
! 	cachesize = st.st_size * 20;		/* 20?  what's 20? */
  	if (cachesize > MAX_CACHESIZE)
  		cachesize = MAX_CACHESIZE;
  	else if (cachesize < MIN_CACHESIZE)
  		cachesize = MIN_CACHESIZE;
  	openinfo.cachesize = cachesize;
+ 	openinfo.nelem = st.st_size / 80;	/* 80 chars per line!  ;-) */
  
  	/* Open the temporary insecure password database. */
  	if (!secureonly) {
***************
*** 487,496 ****
  
  	snprintf(buf, sizeof(buf), "%s%s", prefix, to);
  	if (rename(from, buf)) {
  		sverrno = errno;
! 		(void)snprintf(buf, sizeof(buf), "%s to %s", from, buf);
  		errno = sverrno;
! 		error(buf);
  	}
  }
  
--- 507,518 ----
  
  	snprintf(buf, sizeof(buf), "%s%s", prefix, to);
  	if (rename(from, buf)) {
+ 		char ebuf[BUFSIZ];
+ 
  		sverrno = errno;
! 		(void)snprintf(ebuf, sizeof(ebuf), "rename %s to %s failed", from, buf);
  		errno = sverrno;
! 		error(ebuf);
  	}
  }
  
>Release-Note:
>Audit-Trail:
>Unformatted: