Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/ad-namecache]: src/sys vfs_lookup:



details:   https://anonhg.NetBSD.org/src/rev/65d3ef14b1bb
branches:  ad-namecache
changeset: 467228:65d3ef14b1bb
user:      ad <ad%NetBSD.org@localhost>
date:      Fri Jan 17 22:26:25 2020 +0000

description:
vfs_lookup:

- Do the easy component name lookups directly in the namecache without
  taking vnode locks nor vnode references (between the start and the leaf /
  parent), which seems to largely solve the lock contention problem with
  namei().  It needs support from the file system, which has to tell the
  name cache about directory permissions (only ffs and tmpfs tried so far),
  and I'm not sure how or if it can work with layered file systems yet.
  Work in progress.

vfs_cache:

- Make the rbtree operations more efficient: inline the lookup, and key on a
  64-bit hash value (32 bits plus 16 bits length) rather than names.

- Take namecache stuff out of vnode_impl, and take the rwlocks, and put them
  all together an an nchnode struct which is mapped 1:1: with vnodes.  Saves
  memory and nicer cache profile.

- Add a routine to help vfs_lookup do its easy component name lookups.

- Report some more stats.

- Tidy up the file a bit.

diffstat:

 sys/fs/tmpfs/tmpfs_subr.c |     8 +-
 sys/kern/vfs_cache.c      |  1196 +++++++++++++++++++++++++-------------------
 sys/kern/vfs_lookup.c     |   374 ++++++++++----
 sys/sys/namei.src         |    29 +-
 sys/sys/vnode_impl.h      |    12 +-
 sys/ufs/ffs/ffs_vfsops.c  |     6 +-
 sys/ufs/ufs/ufs_vnops.c   |     7 +-
 7 files changed, 989 insertions(+), 643 deletions(-)

diffs (truncated from 2344 to 300 lines):

diff -r 43f83531ea04 -r 65d3ef14b1bb sys/fs/tmpfs/tmpfs_subr.c
--- a/sys/fs/tmpfs/tmpfs_subr.c Fri Jan 17 21:55:13 2020 +0000
+++ b/sys/fs/tmpfs/tmpfs_subr.c Fri Jan 17 22:26:25 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: tmpfs_subr.c,v 1.105 2019/09/18 17:59:15 christos Exp $        */
+/*     $NetBSD: tmpfs_subr.c,v 1.105.2.1 2020/01/17 22:26:25 ad Exp $  */
 
 /*
  * Copyright (c) 2005-2013 The NetBSD Foundation, Inc.
@@ -73,7 +73,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.105 2019/09/18 17:59:15 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.105.2.1 2020/01/17 22:26:25 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/cprng.h>
@@ -147,6 +147,8 @@
        vp->v_data = node;
        node->tn_vnode = vp;
        uvm_vnp_setsize(vp, node->tn_size);
+       KASSERT(node->tn_mode != VNOVAL);
+       cache_set_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
 }
 
 /*
@@ -1037,6 +1039,7 @@
        node->tn_mode = (mode & ALLPERMS);
        tmpfs_update(vp, TMPFS_UPDATE_CTIME);
        VN_KNOTE(vp, NOTE_ATTRIB);
+       cache_update_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
        return 0;
 }
 
@@ -1081,6 +1084,7 @@
        node->tn_gid = gid;
        tmpfs_update(vp, TMPFS_UPDATE_CTIME);
        VN_KNOTE(vp, NOTE_ATTRIB);
+       cache_update_id(vp, node->tn_mode, node->tn_uid, node->tn_gid);
        return 0;
 }
 
diff -r 43f83531ea04 -r 65d3ef14b1bb sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c      Fri Jan 17 21:55:13 2020 +0000
+++ b/sys/kern/vfs_cache.c      Fri Jan 17 22:26:25 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: vfs_cache.c,v 1.126.2.5 2020/01/14 11:07:40 ad Exp $   */
+/*     $NetBSD: vfs_cache.c,v 1.126.2.6 2020/01/17 22:26:25 ad Exp $   */
 
 /*-
  * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
@@ -61,7 +61,7 @@
  */
 
 /*
- * Name caching works as follows:
+ * Name caching:
  *
  *     Names found by directory scans are retained in a cache for future
  *     reference.  It is managed pseudo-LRU, so frequently used names will
@@ -78,17 +78,14 @@
  * Data structures:
  *
  *     The original BSD implementation used a global hash table, which
- *     works very well on a uniprocessor system but causes performance
- *     difficulties on a multiprocessor system.  The global hash table is
- *     also difficult to size dynamically, and can become very large.  To
- *     try and address these problems, the focus of interest in this
- *     implementation is the directory itself.  A per-directory structure
- *     is used to look up names.
- *
- *     XXX Currently this structure is an rbtree, but rbtrees touch many
- *     cache lines during a lookup and so perform badly.  The intent is to
- *     utimately make use of some other data structure, perhaps a Robin
- *     Hood hash.  Insert blurb here when that happens.
+ *     works very well but can cause difficulties for the CPU cache on
+ *     modern CPUs, and concurrency headaches for the kernel hacker on
+ *     multiprocessor systems.  The global hash table is also difficult to
+ *     size dynamically.  To try and address these concerns, the focus of
+ *     interest in this implementation is the directory itself: a
+ *     per-directory red-black tree is used to look up names.  Other than
+ *     the choice of data structure, it works largely the same way as the
+ *     BSD implementation.
  *
  * Replacement:
  *
@@ -109,46 +106,49 @@
  * Lock order:
  *
  *     1) nc_dvp->vi_ncdlock
- *     2) cache_list_lock
- *     3) vp->v_interlock
+ *     2) nc_dvp->vi_ncvlock
+ *     3) cache_lru_lock, vp->v_interlock
  *
  * Ugly ASCII diagram:
  *
  *     XXX replace tabs with spaces, make less ugly
  *
  *          ...
- *          |
- *     -----o-----
- *     |  VDIR   |
- *     |  vnode  |
- *     -----------
+ *           ^
+ *           |
+ *      -----o-----
+ *      |  VDIR   |
+ *      | nchnode |
+ *      -----------
  *           ^
  *           |- nd_tree
- *          |                                           
- *     +----+----+               -----------               -----------
- *     |  VDIR   |               |  VCHR   |               |  VREG   |
- *     |  vnode  o-----+         |  vnode  o-----+         |  vnode  o------+
- *     +---------+     |         -----------     |         -----------      |
- *          ^          |              ^          |              ^           |
- *          |- nc_vp   |- vi_nclist   |- nc_vp   |- vi_nclist   |- nc_vp    |
- *          |          |              |          |              |           |
- *     -----o-----     |         -----o-----     |         -----o-----      |
- *  +---onamecache|<----+     +---onamecache|<----+    +---onamecache|<-----+
- *  |  -----------           |   -----------           |   -----------
- *  |        ^               |        ^                |        ^
+ *           |                                                           
+ *      -----o-----               -----------               -----------
+ *      |  VDIR   |               |  VCHR   |               |  VREG   |
+ *      | nchnode o-----+         | nchnode o-----+         | nchnode o------+
+ *      -----------     |         -----------     |         -----------      |
+ *           ^          |              ^          |              ^           |
+ *           |- nc_nn   |- nn_list     |- nc_nn   |- nn_list     |- nc_nn    |
+ *           |          |              |          |              |           |
+ *      -----o-----     |         -----o-----     |         -----o-----      |
+ *  +---onamecache|<----+     +---onamecache|<----+     +---onamecache|<-----+
+ *  |   -----------           |   -----------           |   -----------
+ *  |        ^                |        ^                |        ^
  *  |        |                |        |                |        |
- *  |        |  +----------------------+               |        |
- *  |-nc_dvp | +-------------------------------------------------+
- *  |       |/- nd_tree      |                         |
- *  |       |                |- nc_dvp                 |
- *  |  -----o-----           |                         |
- *  +-->|  VDIR   |<----------+                                |
- *     |  vnode  |<------------------------------------+
- *     -----------
+ *  |        |  +----------------------+                |        |
+ *  |-nc_dnn | +-------------------------------------------------+
+ *  |        |/- nd_tree      |                         |
+ *  |        |                |- nc_dnn                 |- nc_dnn
+ *  |   -----o-----           |                         |
+ *  +-->|  VDIR   |<----------+                         |
+ *      | nchnode |<------------------------------------+
+ *      -----------
+ *
+ *      START HERE
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.5 2020/01/14 11:07:40 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.6 2020/01/17 22:26:25 ad Exp $");
 
 #define __NAMECACHE_PRIVATE
 #ifdef _KERNEL_OPT
@@ -156,14 +156,15 @@
 #include "opt_dtrace.h"
 #endif
 
-#include <sys/param.h>
 #include <sys/cpu.h>
 #include <sys/errno.h>
 #include <sys/evcnt.h>
+#include <sys/hash.h>
 #include <sys/kernel.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
+#include <sys/param.h>
 #include <sys/pool.h>
 #include <sys/sdt.h>
 #include <sys/sysctl.h>
@@ -171,13 +172,48 @@
 #include <sys/time.h>
 #include <sys/vnode_impl.h>
 
+#include <miscfs/genfs/genfs.h>
+
+/*
+ * Per-vnode state for the namecache.  This is allocated apart from struct
+ * vnode to make the best use of memory, and best use of CPU cache.  Field
+ * markings and corresponding locks:
+ *
+ *     -       stable throught the lifetime of the vnode
+ *     n       protected by nn_lock
+ *     l       protected by nn_listlock
+ */
+struct nchnode {
+       /* First cache line: frequently used stuff. */
+       rb_tree_t       nn_tree;        /* n  namecache tree */
+       TAILQ_HEAD(,namecache) nn_list; /* l  namecaches (parent) */
+       mode_t          nn_mode;        /* n  cached mode or VNOVAL */
+       uid_t           nn_uid;         /* n  cached UID or VNOVAL */
+       gid_t           nn_gid;         /* n  cached GID or VNOVAL */
+       uint32_t        nn_spare;       /* -  spare (padding) */
+
+       /* Second cache line: locks and infrequenly used stuff. */
+       krwlock_t       nn_lock         /* -  lock on node */
+           __aligned(COHERENCY_UNIT);
+       krwlock_t       nn_listlock;    /* -  lock on nn_list */
+       struct vnode    *nn_vp;         /* -  backpointer to vnode */
+};
+
+static void    cache_activate(struct namecache *);
+static int     cache_compare_key(void *, const void *, const void *);
+static int     cache_compare_nodes(void *, const void *, const void *);
+static void    cache_deactivate(void);
+static void    cache_reclaim(void);
+static int     cache_stat_sysctl(SYSCTLFN_ARGS);
+
 /* Per-CPU counters. */
 struct nchstats_percpu _NAMEI_CACHE_STATS(uintptr_t);
 
 /* Global pool cache. */
 static pool_cache_t cache_pool __read_mostly;
+static pool_cache_t cache_node_pool __read_mostly;
 
-/* LRU replacement state. */
+/* LRU replacement. */
 enum cache_lru_id {
        LRU_ACTIVE,
        LRU_INACTIVE,
@@ -194,7 +230,6 @@
 /* Cache effectiveness statistics.  This holds total from per-cpu stats */
 struct nchstats        nchstats __cacheline_aligned;
 
-/* Macro to count an event. */
 #define        COUNT(f)        do { \
        kpreempt_disable(); \
        ((struct nchstats_percpu *)curcpu()->ci_data.cpu_nch)->f++; \
@@ -206,12 +241,16 @@
 static const int cache_lru_maxscan = 128;      /* max # to scan/reclaim */
 static int doingcache = 1;                     /* 1 => enable the cache */
 
-static void    cache_reclaim(void);
-static void    cache_remove(struct namecache *, const bool);
+/* sysctl */
+static struct  sysctllog *cache_sysctllog;
 
-/* sysctl */
-static struct  sysctllog *sysctllog;
-static void    sysctl_cache_stat_setup(void);
+/* Read-black tree */
+static rb_tree_ops_t cache_rbtree_ops __read_mostly = {
+       .rbto_compare_nodes = cache_compare_nodes,
+       .rbto_compare_key = cache_compare_key,
+       .rbto_node_offset = offsetof(struct namecache, nc_tree),
+       .rbto_context = NULL
+};
 
 /* dtrace hooks */
 SDT_PROVIDER_DEFINE(vfs);
@@ -243,13 +282,16 @@
 static int
 cache_compare_nodes(void *context, const void *n1, const void *n2)
 {
-       const struct namecache *ncp1 = n1;
-       const struct namecache *ncp2 = n2;
+       const struct namecache *nc1 = n1;
+       const struct namecache *nc2 = n2;
 
-       if (ncp1->nc_nlen != ncp2->nc_nlen) {
-               return (int)ncp1->nc_nlen - (int)ncp2->nc_nlen;
+       if (nc1->nc_key < nc2->nc_key) {
+               return -1;
        }
-       return memcmp(ncp1->nc_name, ncp2->nc_name, ncp1->nc_nlen);
+       if (nc1->nc_key > nc2->nc_key) {
+               return 1;
+       }
+       return 0;
 }
 
 /*
@@ -258,156 +300,135 @@
 static int
 cache_compare_key(void *context, const void *n, const void *k)
 {
-       const struct namecache *ncp = n;
-       const struct iovec *iov = k;
+       const struct namecache *nc = n;
+       const int64_t key = *(const int64_t *)k;
 
-       if (ncp->nc_nlen != iov->iov_len) {
-               return (int)ncp->nc_nlen - (int)iov->iov_len;
+       if (nc->nc_key < key) {
+               return -1;
        }
-       return memcmp(ncp->nc_name, iov->iov_base, ncp->nc_nlen);
+       if (nc->nc_key > key) {



Home | Main Index | Thread Index | Old Index