Subject: Re: "too many files" in an FFS dir?
To: None <tech-kern@netbsd.org>
From: Paul A Vixie <vixie@mfnx.net>
List: tech-kern
Date: 12/19/2000 18:48:22
> 	I'd be happy to run it on a couple of NetBSD boxes here.

Since it's short I'm including it inline.  Note that it's currently set up
to compare deep vs. flat storage schemas, rather than to be as fast as
possible.

# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
#	README
#	Makefile
#	tester.c
#	load-flat.sh
#	load-deep.sh
#	load-pgsql.sh
#	store-flat.sh
#	store-deep.sh
#	store-pgsql.sh
#
echo x - README
sed 's/^X//' >README << 'END-of-README'
X$Id:$
X
XThis is ipstore, a test jig designed to measure the performance of various
Xstorage schemas for ticket systems or other applications where arbitary sized
Xobjects are indexed by IP address or other short, structured keys.  In this
Xcollection of tools and scripts, the key ("file name") is assumed to be an
XIP address.  There are three schemas represented:
X
X	{store,load}-flat.sh
X
X		puts text in files named /var/tmp/store/192.5.5.241.txt
X
X		these scripts actually execute more commands than they
X		need, to remove variables from the comparison against
X		{store,load}-deep.sh (see below).
X
X	{store,load}-deep.sh
X
X		puts text in files named /var/tmp/store/192/5/5/241.txt
X
X		this depth attempts to avoid "long directory syndrome"
X		which can cause O(n^2) performance on create, search,
X		and delete operations.
X
X	{store,load}-pgsql.sh
X
X		puts text in postgres tables using the (unreleased) "pgcat"
X		utility.  there's a key column of type CIDR and a value
X		column of type TEXT.  requires pgsql 7.1 or later (TOAST).
X
X		runs a lot faster if the key column is a PRIMARY KEY and
X		may run a lot faster after VACUUM ANALYZE (still checking).
X
XThe test jig itself is called, appropriately, "tester.c".  It depends on
Xlibbind, specifically to get eventlib's timer support.  Just install bind8
Xin the default place and you'll be alright.
X
XYou will need a large file full of unique IP addresses, preferrably real
Xones to show the usual psuedofractal distribution and resulting key
Xperformance.  You should use at least 50,000 IP addresses to make sure
Xthat none of the operating system's usual "fast path" optimizations will
Xhelp you.  In the instructions below it's assumed that the addresses are
Xstored, one per line, in a file called "addr-list".
X
XUsage instructions are predictably wonky:
X
X	make	does what you expect.  also "make clean" when you're ready.
X
X	mkdir /var/tmp/store
X	./tester addr-list /etc/ttys ./store-flat > store-flat-results
X	sort addr-list > sorted-list
X	./tester addr-list /dev/null ./load-flat > load-flat-results
X	rm -rf /var/tmp/store
X
X	mkdir /var/tmp/store
X	./tester addr-list /etc/ttys ./store-deep > store-deep-results
X	sort addr-list > sorted-list
X	./tester addr-list /dev/null ./load-deep > load-deep-results
X	rm -rf /var/tmp/store
X
X	gnuplot
X	> set term png small color
X	> set output "store-results.png"
X	> plot "store-flat-results", "store-deep-results"
X	> set output "load-results.png"
X	> plot "load-flat-results", "load-deep-results"
X	> quit
X
X(/etc/ttys is an example of a small file you'd like 50,000 copies made of.)
X
Xgnuplot is pretty cool.  if you skip "set term" and "set output" then it
Xwill make X windows with its graph results.
X
XFor grins, try it on an MFS (Memory File System) and look at the differences.
XAnd especially, try it with and without "softdep", and with and without "SCSI
Xdisconnects".
END-of-README
echo x - Makefile
sed 's/^X//' >Makefile << 'END-of-Makefile'
X## Copyright (c) 2000 by Mail Abuse Prevention System LLC
X##
X## Permission to use, copy, modify, and distribute this software for any
X## purpose with or without fee is hereby granted, provided that the above
X## copyright notice and this permission notice appear in all copies.
X##
X## THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
X## ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
X## OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
X## CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
X## DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
X## PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
X## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
X## SOFTWARE.
X
X# $Id:$
X
XSH= load-flat load-deep load-pgsql store-flat store-deep store-pgsql
XALL= tester $(SH)
X
XKIT= README Makefile tester.c load-flat.sh load-deep.sh load-pgsql.sh \
X	store-flat.sh store-deep.sh store-pgsql.sh
X
XCC= gcc -Wall
XCFLAGS= -I/usr/local/bind/include -O
XLDFLAGS= -L/usr/local/bind/lib
XLIBS= -lbind
X
Xall: $(ALL)
X
X.SUFFIXES: .sh
X
Xkit: FRC
X	shar $(KIT) >kit
X
Xclean:; rm -f $(ALL) kit; rm -f *.o *~ *.BAK *.CKP #*#
X
Xtester: tester.o Makefile
X	cc $(LDFLAGS) -o tester tester.o $(LIBS)
X
Xtester.o: tester.c Makefile
X
X$(SH):
X	rm -f $@; cp $@.sh $@; chmod +x $@
X
XFRC:
END-of-Makefile
echo x - tester.c
sed 's/^X//' >tester.c << 'END-of-tester.c'
X/*
X * Copyright (c) 2000 by Mail Abuse Prevention System LLC
X *
X * Permission to use, copy, modify, and distribute this software for any
X * purpose with or without fee is hereby granted, provided that the above
X * copyright notice and this permission notice appear in all copies.
X *
X * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
X * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
X * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
X * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
X * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
X * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
X * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
X * SOFTWARE.
X */
X
X#ifndef LINT
Xstatic const char rcsid[] = "$Id:$";
X#endif
X
X#include <sys/types.h>
X#include <sys/file.h>
X#include <sys/wait.h>
X
X#include <assert.h>
X#include <stdio.h>
X#include <stdlib.h>
X#include <string.h>
X#include <unistd.h>
X
X#include <isc/eventlib.h>
X
Xstatic const char *progname = "amnesia";
X
Xstatic int run(const char *, const char *, int, int);
X
Xstatic void
Xusage(const char *msg) {
X	fprintf(stderr, "%s: usage error (%s)\n", progname, msg);
X	fprintf(stderr, "usage: %s addrfile textfile programname\n", progname);
X	exit(1);
X}
X
Xint
Xmain(int argc, char *argv[]) {
X	char *addrfilename, *textfilename, *programname;
X	FILE *addrfile;
X	int textfile, devnull, lineno;
X	struct timespec before, delta;
X	char line[100];
X
X	if ((progname = strrchr(argv[0], '/')) != NULL)
X		progname++;
X	else
X		progname = argv[0];
X	if (argc != 4)
X		usage("wrong # of args");
X	addrfilename = argv[1];
X	textfilename = argv[2];
X	programname = argv[3];
X
X	if ((addrfile = fopen(addrfilename, "r")) == NULL) {
X		perror(addrfilename);
X		exit(1);
X	}
X	if ((textfile = open(textfilename, O_RDONLY)) == -1) {
X		perror(textfilename);
X		exit(1);
X	}
X	if ((devnull = open("/dev/null", O_RDWR)) == -1) {
X		perror("/dev/null");
X		exit(1);
X	}
X
X	lineno = 0;
X	while (fgets(line, sizeof line, addrfile) != NULL) {
X		char *nl = strrchr(line, '\n');
X
X		lineno++;
X		if (nl != NULL)
X			*nl = '\0';
X		before = evNowTime();
X		if (run(programname, line, textfile, devnull) != 0) {
X			perror(programname);
X			exit(1);
X		}
X		delta = evSubTime(evNowTime(), before);
X		printf("%d\t%f\n", lineno,
X		       (double)(delta.tv_sec + (delta.tv_nsec / 1000000000.0)));
X	}
X	fclose(addrfile);
X	close(textfile);
X	return (0);
X}
X
Xstatic int
Xrun(const char *programname, const char *arg, int stdinfile, int stdoutfile) {
X	int status;
X
X	switch (vfork()) {
X	case -1:
X		return (-1);
X	case 0:
X		lseek(stdinfile, 0, SEEK_SET);
X		dup2(stdinfile, STDIN_FILENO);
X		dup2(stdoutfile, STDOUT_FILENO);
X		execlp(programname, programname, arg, NULL);
X		perror(programname);
X		_exit(1);
X	default:
X		if (wait(&status) < 0)
X			return (-1);
X		return (WEXITSTATUS(status));
X	}
X	abort();
X}
END-of-tester.c
echo x - load-flat.sh
sed 's/^X//' >load-flat.sh << 'END-of-load-flat.sh'
X#!/bin/sh
X
X# $Id:$
X
X# Here, we run sed and mkdir just to remove variables from the
X# comparison between our performance and store-deep's performance.
X
Xdir=${TMPDIR:-/var/tmp}/store
Xpath=$dir/`echo $1 | sed 's/\./\//g'`.txt
X
Xexec cat < $dir/$1.txt
END-of-load-flat.sh
echo x - load-deep.sh
sed 's/^X//' >load-deep.sh << 'END-of-load-deep.sh'
X#!/bin/sh
X
X# $Id:$
X
Xdir=${TMPDIR:-/var/tmp}/store
Xpath=$dir/`echo $1 | sed 's/\./\//g'`.txt
X
Xexec cat < $path
X
END-of-load-deep.sh
echo x - load-pgsql.sh
sed 's/^X//' >load-pgsql.sh << 'END-of-load-pgsql.sh'
X#!/bin/sh
X
X# $Id:$
X
Xexec ./pgcat get rss addr $1 file
END-of-load-pgsql.sh
echo x - store-flat.sh
sed 's/^X//' >store-flat.sh << 'END-of-store-flat.sh'
X#!/bin/sh
X
X# $Id:$
X
X# Here, we run sed and mkdir just to remove variables from the
X# comparison between our performance and store-deep's performance.
X
Xdir=${TMPDIR:-/var/tmp}/store
Xpath=$dir/`echo $1 | sed 's/\./\//g'`.txt
X
Xmkdir -p `dirname $path` >/dev/null 2>&1
X#exec cat > $path
Xexec cat > $dir/$1.txt
END-of-store-flat.sh
echo x - store-deep.sh
sed 's/^X//' >store-deep.sh << 'END-of-store-deep.sh'
X#!/bin/sh
X
X# $Id:$
X
Xdir=${TMPDIR:-/var/tmp}/store
Xpath=$dir/`echo $1 | sed 's/\./\//g'`.txt
X
Xmkdir -p `dirname $path` >/dev/null 2>&1
Xexec cat > $path
END-of-store-deep.sh
echo x - store-pgsql.sh
sed 's/^X//' >store-pgsql.sh << 'END-of-store-pgsql.sh'
X#!/bin/sh
X
X# $Id:$
X
Xexec ./pgcat put rss addr $1 file
END-of-store-pgsql.sh
exit