Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/ufs Bring in Ian Dowse's Dirhash from FreeBSD. Hash tabl...



details:   https://anonhg.NetBSD.org/src/rev/ac8b13f3c26e
branches:  trunk
changeset: 573249:ac8b13f3c26e
user:      rumble <rumble%NetBSD.org@localhost>
date:      Sun Jan 23 19:37:05 2005 +0000

description:
Bring in Ian Dowse's Dirhash from FreeBSD. Hash tables of
directories are created on the fly and used to increase
performance by circumventing ufs_lookup's linear search.

Dirhash is enabled by the UFS_DIRHASH option, but not
by default.

diffstat:

 sys/ufs/files.ufs         |     3 +-
 sys/ufs/ufs/Makefile      |     5 +-
 sys/ufs/ufs/dirhash.h     |   124 ++++
 sys/ufs/ufs/inode.h       |     4 +-
 sys/ufs/ufs/ufs_dirhash.c |  1121 +++++++++++++++++++++++++++++++++++++++++++++
 sys/ufs/ufs/ufs_inode.c   |    11 +-
 sys/ufs/ufs/ufs_lookup.c  |   100 +++-
 sys/ufs/ufs/ufs_vfsops.c  |    13 +-
 sys/ufs/ufs/ufs_vnops.c   |    11 +-
 9 files changed, 1380 insertions(+), 12 deletions(-)

diffs (truncated from 1611 to 300 lines):

diff -r a12f59e0a398 -r ac8b13f3c26e sys/ufs/files.ufs
--- a/sys/ufs/files.ufs Sun Jan 23 19:24:31 2005 +0000
+++ b/sys/ufs/files.ufs Sun Jan 23 19:37:05 2005 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: files.ufs,v 1.3 2004/05/25 14:54:58 hannken Exp $
+#      $NetBSD: files.ufs,v 1.4 2005/01/23 19:37:05 rumble Exp $
 
 deffs                                  FFS
 deffs                                  EXT2FS
@@ -50,6 +50,7 @@
 file   ufs/mfs/mfs_vnops.c             mfs
 
 file   ufs/ufs/ufs_bmap.c              ffs | lfs | mfs | ext2fs
+file   ufs/ufs/ufs_dirhash.c           ffs | lfs | mfs | ext2fs
 file   ufs/ufs/ufs_ihash.c             ffs | lfs | mfs | ext2fs
 file   ufs/ufs/ufs_inode.c             ffs | lfs | mfs
 file   ufs/ufs/ufs_lookup.c            ffs | lfs | mfs | ext2fs
diff -r a12f59e0a398 -r ac8b13f3c26e sys/ufs/ufs/Makefile
--- a/sys/ufs/ufs/Makefile      Sun Jan 23 19:24:31 2005 +0000
+++ b/sys/ufs/ufs/Makefile      Sun Jan 23 19:37:05 2005 +0000
@@ -1,7 +1,8 @@
-#      $NetBSD: Makefile,v 1.1 1998/06/12 23:23:12 cgd Exp $
+#      $NetBSD: Makefile,v 1.2 2005/01/23 19:37:05 rumble Exp $
 
 INCSDIR= /usr/include/ufs/ufs
 
-INCS=  dinode.h dir.h inode.h quota.h ufs_bswap.h ufs_extern.h ufsmount.h
+INCS=  dinode.h dir.h dirhash.h inode.h quota.h ufs_bswap.h ufs_extern.h \
+       ufsmount.h
 
 .include <bsd.kinc.mk>
diff -r a12f59e0a398 -r ac8b13f3c26e sys/ufs/ufs/dirhash.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/ufs/ufs/dirhash.h     Sun Jan 23 19:37:05 2005 +0000
@@ -0,0 +1,124 @@
+/*     $NetBSD: dirhash.h,v 1.1 2005/01/23 19:37:05 rumble Exp $       */
+
+/*
+ * Copyright (c) 2001 Ian Dowse.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/ufs/ufs/dirhash.h,v 1.2.2.2 2004/12/08 11:54:13 dwmalone Exp $
+ */
+
+#ifndef _UFS_UFS_DIRHASH_H_
+#define _UFS_UFS_DIRHASH_H_
+
+/*
+ * For fast operations on large directories, we maintain a hash
+ * that maps the file name to the offset of the directory entry within
+ * the directory file.
+ *
+ * The hashing uses a dumb spillover to the next free slot on
+ * collisions, so we must keep the utilisation low to avoid
+ * long linear searches. Deleted entries that are not the last
+ * in a chain must be marked DIRHASH_DEL.
+ *
+ * We also maintain information about free space in each block
+ * to speed up creations.
+ */
+#define DIRHASH_EMPTY  (-1)    /* entry unused */
+#define DIRHASH_DEL    (-2)    /* deleted entry; may be part of chain */
+
+#define DIRALIGN       4
+#define DH_NFSTATS     (DIRECTSIZ(MAXNAMLEN + 1) / DIRALIGN)
+                                /* max DIRALIGN words in a directory entry */
+
+/*
+ * Dirhash uses a score mechanism to achieve a hybrid between a
+ * least-recently-used and a least-often-used algorithm for entry
+ * recycling. The score is incremented when a directory is used, and
+ * decremented when the directory is a candidate for recycling. When
+ * the score reaches zero, the hash is recycled. Hashes are linked
+ * together on a TAILQ list, and hashes with higher scores filter
+ * towards the tail (most recently used) end of the list.
+ *
+ * New hash entries are given an inital score of DH_SCOREINIT and are
+ * placed at the most-recently-used end of the list. This helps a lot
+ * in the worst-case case scenario where every directory access is
+ * to a directory that is not hashed (i.e. the working set of hash
+ * candidates is much larger than the configured memry limit). In this
+ * case it limits the number of hash builds to 1/DH_SCOREINIT of the
+ * number of accesses.
+ */ 
+#define DH_SCOREINIT   8       /* initial dh_score when dirhash built */
+#define DH_SCOREMAX    64      /* max dh_score value */
+
+/*
+ * The main hash table has 2 levels. It is an array of pointers to
+ * blocks of DH_NBLKOFF offsets.
+ */
+#define DH_BLKOFFSHIFT 8
+#define DH_NBLKOFF     (1 << DH_BLKOFFSHIFT)
+#define DH_BLKOFFMASK  (DH_NBLKOFF - 1)
+
+#define DH_ENTRY(dh, slot) \
+    ((dh)->dh_hash[(slot) >> DH_BLKOFFSHIFT][(slot) & DH_BLKOFFMASK])
+
+struct dirhash {
+       doff_t  **dh_hash;      /* the hash array (2-level) */
+       int     dh_narrays;     /* number of entries in dh_hash */
+       int     dh_hlen;        /* total slots in the 2-level hash array */
+       int     dh_hused;       /* entries in use */
+
+       u_int8_t *dh_blkfree;   /* free DIRALIGN words in each dir block */
+       int     dh_nblk;        /* size of dh_blkfree array */
+       int     dh_dirblks;     /* number of DIRBLKSIZ blocks in dir */
+       int     dh_firstfree[DH_NFSTATS + 1]; /* first blk with N words free */
+
+       int     dh_seqopt;      /* sequential access optimisation enabled */
+       doff_t  dh_seqoff;      /* sequential access optimisation offset */
+
+       int     dh_score;       /* access count for this dirhash */
+
+       int     dh_onlist;      /* true if on the ufsdirhash_list chain */
+
+       TAILQ_ENTRY(dirhash) dh_list;   /* chain of all dirhashes */
+};
+
+
+/*
+ * Dirhash functions.
+ */
+int    ufsdirhash_build(struct inode *);
+doff_t ufsdirhash_findfree(struct inode *, int, int *);
+doff_t ufsdirhash_enduseful(struct inode *);
+int    ufsdirhash_lookup(struct inode *, const char *, int, doff_t *,
+           struct buf **, doff_t *);
+void   ufsdirhash_newblk(struct inode *, doff_t);
+void   ufsdirhash_add(struct inode *, struct direct *, doff_t);
+void   ufsdirhash_remove(struct inode *, struct direct *, doff_t);
+void   ufsdirhash_move(struct inode *, struct direct *, doff_t, doff_t);
+void   ufsdirhash_dirtrunc(struct inode *, doff_t);
+void   ufsdirhash_free(struct inode *);
+void   ufsdirhash_checkblock(struct inode *, char *, doff_t);
+void   ufsdirhash_init(void);
+void   ufsdirhash_done(void);
+
+#endif /* !_UFS_UFS_DIRHASH_H_ */
diff -r a12f59e0a398 -r ac8b13f3c26e sys/ufs/ufs/inode.h
--- a/sys/ufs/ufs/inode.h       Sun Jan 23 19:24:31 2005 +0000
+++ b/sys/ufs/ufs/inode.h       Sun Jan 23 19:37:05 2005 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: inode.h,v 1.39 2004/08/14 14:32:04 mycroft Exp $       */
+/*     $NetBSD: inode.h,v 1.40 2005/01/23 19:37:05 rumble Exp $        */
 
 /*
  * Copyright (c) 1982, 1989, 1993
@@ -129,6 +129,8 @@
        u_int32_t i_uid;        /* File owner. */
        u_int32_t i_gid;        /* File group. */
 
+       struct dirhash *i_dirhash;      /* Hashing for large directories */
+
        /*
         * The on-disk dinode itself.
         */
diff -r a12f59e0a398 -r ac8b13f3c26e sys/ufs/ufs/ufs_dirhash.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/ufs/ufs/ufs_dirhash.c Sun Jan 23 19:37:05 2005 +0000
@@ -0,0 +1,1121 @@
+/*     $NetBSD: ufs_dirhash.c,v 1.1 2005/01/23 19:37:05 rumble Exp $   */
+
+/*
+ * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.3.2.8 2004/12/08 11:54:13 dwmalone Exp $
+ */
+
+/*
+ * This implements a hash-based lookup scheme for UFS directories.
+ */
+
+#ifdef UFS_DIRHASH
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/types.h>
+#include <sys/hash.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/pool.h>
+#include <sys/sysctl.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/dir.h>
+#include <ufs/ufs/dirhash.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_bswap.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#define WRAPINCR(val, limit)   (((val) + 1 == (limit)) ? 0 : ((val) + 1))
+#define WRAPDECR(val, limit)   (((val) == 0) ? ((limit) - 1) : ((val) - 1))
+#define OFSFMT(ip)             ((ip)->i_ump->um_maxsymlinklen <= 0)
+#define BLKFREE2IDX(n)         ((n) > DH_NFSTATS ? DH_NFSTATS : (n))
+
+static MALLOC_DEFINE(M_DIRHASH, "UFS dirhash", "UFS directory hash tables");
+
+static int ufs_dirhashminblks = 5;
+static int ufs_dirhashmaxmem = 2 * 1024 * 1024;
+static int ufs_dirhashmem; 
+static int ufs_dirhashcheck = 0;
+
+static int ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen);
+static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff,
+          int dirblksiz);
+static void ufsdirhash_delslot(struct dirhash *dh, int slot);
+static int ufsdirhash_findslot(struct dirhash *dh, const char *name,
+          int namelen, doff_t offset);
+static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset,
+          int dirblksiz);
+static int ufsdirhash_recycle(int wanted);
+
+POOL_INIT(ufsdirhash_pool, DH_NBLKOFF * sizeof(daddr_t), 0, 0, 0, "ufsdirhash",
+    &pool_allocator_nointr);
+
+#define DIRHASHLIST_LOCK()             do { } while (0)
+#define DIRHASHLIST_UNLOCK()           do { } while (0)
+#define DIRHASH_LOCK(dh)               do { } while (0)
+#define DIRHASH_UNLOCK(dh)             do { } while (0)
+#define DIRHASH_BLKALLOC_WAITOK()      pool_get(&ufsdirhash_pool, PR_WAITOK)
+#define DIRHASH_BLKFREE(ptr)           pool_put(&ufsdirhash_pool, ptr)
+
+/* Dirhash list; recently-used entries are near the tail. */
+static TAILQ_HEAD(, dirhash) ufsdirhash_list;
+
+/*
+ * Attempt to build up a hash table for the directory contents in
+ * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
+ */
+int
+ufsdirhash_build(struct inode *ip)
+{
+       struct dirhash *dh;
+       struct buf *bp = NULL;
+       struct direct *ep;
+       struct vnode *vp;
+       doff_t bmask, pos;
+       int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
+       const int needswap = UFS_MPNEEDSWAP(ip->i_ump); 
+       int dirblksiz = ip->i_ump->um_dirblksiz;
+
+       /* Check if we can/should use dirhash. */
+       if (ip->i_dirhash == NULL) {
+               if (ip->i_size < (ufs_dirhashminblks * dirblksiz) || OFSFMT(ip))
+                       return (-1);
+       } else {
+               /* Hash exists, but sysctls could have changed. */
+               if (ip->i_size < (ufs_dirhashminblks * dirblksiz) ||
+                   ufs_dirhashmem > ufs_dirhashmaxmem) {
+                       ufsdirhash_free(ip);
+                       return (-1);
+               }
+               /* Check if hash exists and is intact (note: unlocked read). */



Home | Main Index | Thread Index | Old Index