tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: NCHNAMLEN vnode cache limitation removal




> On Sep 11, 2019, at 2:23 AM, Christos Zoulas <christos%zoulas.com@localhost> wrote:
> 
> Comments?

This looks good, and I think it would be fine for it to go in now.  However, I think we should probably instrument how many duplicate names may end up in the name cache over the course of "normal" operation (insert "standard" workloads here).  It may be worth implementing a generic string cache to de-dup those names (which has the secondary advantage of making the name cache entry itself fixed-size again).

> 
> Thanks,
> 
> christos
> 
> Index: sys/namei.h
> ===================================================================
> RCS file: /cvsroot/src/sys/sys/namei.h,v
> retrieving revision 1.98
> diff -u -p -u -r1.98 namei.h
> --- sys/namei.h	3 Jun 2019 06:05:39 -0000	1.98
> +++ sys/namei.h	10 Sep 2019 23:20:48 -0000
> @@ -196,19 +196,9 @@ struct nameidata {
> #endif
> 
> /*
> + * Namecache entry.  
>  * This structure describes the elements in the cache of recent
> - * names looked up by namei. NCHNAMLEN is sized to make structure
> - * size a power of two to optimize allocations.  Minimum reasonable
> - * size is 15.
> - */
> -
> -#define	NCHNAMLEN	31	/* maximum name segment length we bother with */
> -
> -/*
> - * Namecache entry.  This structure is arranged so that frequently
> - * accessed and mostly read-only data is toward the front, with
> - * infrequently accessed data and the lock towards the rear.  The
> - * lock is then more likely to be in a separate cache line.
> + * names looked up by namei.
>  *
>  * Locking rules:
>  *
> @@ -225,14 +215,14 @@ struct namecache {
> 	struct	vnode *nc_dvp;		/* N vnode of parent of name */
> 	struct	vnode *nc_vp;		/* N vnode the name refers to */
> 	int	nc_flags;		/* - copy of componentname ISWHITEOUT */
> -	char	nc_nlen;		/* - length of name */
> -	char	nc_name[NCHNAMLEN];	/* - segment name */
> 	void	*nc_gcqueue;		/* N queue for garbage collection */
> -	TAILQ_ENTRY(namecache) nc_lru;	/* L psuedo-lru chain */
> +	TAILQ_ENTRY(namecache) nc_lru;	/* L pseudo-lru chain */
> 	LIST_ENTRY(namecache) nc_dvlist;/* L dvp's list of cache entries */
> 	LIST_ENTRY(namecache) nc_vlist; /* L vp's list of cache entries */
> 	kmutex_t nc_lock;		/*   lock on this entry */
> 	int	nc_hittime;		/* N last time scored a hit */
> +	u_short	nc_nlen;		/* - length of name */
> +	char	nc_name[0];		/* - segment name */
> };
> 
> #ifdef _KERNEL
> Index: sys/namei.src
> ===================================================================
> RCS file: /cvsroot/src/sys/sys/namei.src,v
> retrieving revision 1.42
> diff -u -p -u -r1.42 namei.src
> --- sys/namei.src	3 Jun 2019 06:04:21 -0000	1.42
> +++ sys/namei.src	10 Sep 2019 23:20:48 -0000
> @@ -188,19 +188,9 @@ NAMEIFL	PARAMASK	0x02ee300	/* mask of pa
> #endif
> 
> /*
> + * Namecache entry. 
>  * This structure describes the elements in the cache of recent
> - * names looked up by namei. NCHNAMLEN is sized to make structure
> - * size a power of two to optimize allocations.  Minimum reasonable
> - * size is 15.
> - */
> -
> -#define	NCHNAMLEN	31	/* maximum name segment length we bother with */
> -
> -/*
> - * Namecache entry.  This structure is arranged so that frequently
> - * accessed and mostly read-only data is toward the front, with
> - * infrequently accessed data and the lock towards the rear.  The
> - * lock is then more likely to be in a separate cache line.
> + * names looked up by namei.
>  *
>  * Locking rules:
>  *
> @@ -217,14 +207,14 @@ struct namecache {
> 	struct	vnode *nc_dvp;		/* N vnode of parent of name */
> 	struct	vnode *nc_vp;		/* N vnode the name refers to */
> 	int	nc_flags;		/* - copy of componentname ISWHITEOUT */
> -	char	nc_nlen;		/* - length of name */
> -	char	nc_name[NCHNAMLEN];	/* - segment name */
> 	void	*nc_gcqueue;		/* N queue for garbage collection */
> 	TAILQ_ENTRY(namecache) nc_lru;	/* L psuedo-lru chain */
> 	LIST_ENTRY(namecache) nc_dvlist;/* L dvp's list of cache entries */
> 	LIST_ENTRY(namecache) nc_vlist; /* L vp's list of cache entries */
> 	kmutex_t nc_lock;		/*   lock on this entry */
> 	int	nc_hittime;		/* N last time scored a hit */
> +	char	nc_nlen;		/* - length of name */
> +	char	nc_name[0];		/* - segment name */
> };
> 
> #ifdef _KERNEL
> Index: kern/vfs_cache.c
> ===================================================================
> RCS file: /cvsroot/src/sys/kern/vfs_cache.c,v
> retrieving revision 1.120
> diff -u -p -u -r1.120 vfs_cache.c
> --- kern/vfs_cache.c	18 Mar 2017 22:36:56 -0000	1.120
> +++ kern/vfs_cache.c	10 Sep 2019 23:20:48 -0000
> @@ -94,13 +94,13 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,
>  * containing name.
>  *
>  * For simplicity (and economy of storage), names longer than
> - * a maximum length of NCHNAMLEN are not cached; they occur
> - * infrequently in any case, and are almost never of interest.
> + * a maximum length of NCHNAMLEN are stored in non-pooled storage.
>  *
>  * Upon reaching the last segment of a path, if the reference
>  * is for DELETE, or NOCACHE is set (rewrite), and the
>  * name is located in the cache, it will be dropped.
>  */
> +#define NCHNAMLEN	30
> 
> /*
>  * Cache entry lifetime:
> @@ -589,7 +589,7 @@ cache_lookup(struct vnode *dvp, const ch
> 
> 	cpup = curcpu()->ci_data.cpu_nch;
> 	mutex_enter(&cpup->cpu_lock);
> -	if (__predict_false(namelen > NCHNAMLEN)) {
> +	if (__predict_false(namelen > USHRT_MAX)) {
> 		SDT_PROBE(vfs, namecache, lookup, toolong, dvp,
> 		    name, namelen, 0, 0);
> 		COUNT(cpup, ncs_long);
> @@ -703,7 +703,7 @@ cache_lookup_raw(struct vnode *dvp, cons
> 
> 	cpup = curcpu()->ci_data.cpu_nch;
> 	mutex_enter(&cpup->cpu_lock);
> -	if (__predict_false(namelen > NCHNAMLEN)) {
> +	if (__predict_false(namelen > USHRT_MAX)) {
> 		COUNT(cpup, ncs_long);
> 		mutex_exit(&cpup->cpu_lock);
> 		/* found nothing */
> @@ -868,7 +868,7 @@ cache_enter(struct vnode *dvp, struct vn
> 
> 	/* First, check whether we can/should add a cache entry. */
> 	if ((cnflags & MAKEENTRY) == 0 ||
> -	    __predict_false(namelen > NCHNAMLEN || !doingcache)) {
> +	    __predict_false(namelen > USHRT_MAX || !doingcache)) {
> 		SDT_PROBE(vfs, namecache, enter, toolong, vp, name, namelen,
> 		    0, 0);
> 		return;
> @@ -882,7 +882,11 @@ cache_enter(struct vnode *dvp, struct vn
> 		mutex_exit(namecache_lock);
> 	}
> 
> -	ncp = pool_cache_get(namecache_cache, PR_WAITOK);
> +	if (namelen > NCHNAMLEN) {
> +		ncp = kmem_alloc(sizeof(*ncp) + namelen, KM_SLEEP);
> +		cache_ctor(NULL, ncp, 0);
> +	} else
> +		ncp = pool_cache_get(namecache_cache, PR_WAITOK);
> 	mutex_enter(namecache_lock);
> 	numcache++;
> 
> @@ -919,7 +923,7 @@ cache_enter(struct vnode *dvp, struct vn
> 		ncp->nc_vlist.le_prev = NULL;
> 		ncp->nc_vlist.le_next = NULL;
> 	}
> -	KASSERT(namelen <= NCHNAMLEN);
> +	KASSERT(namelen <= USHRT_MAX);
> 	ncp->nc_nlen = namelen;
> 	memcpy(ncp->nc_name, name, (unsigned)ncp->nc_nlen);
> 	TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
> @@ -970,7 +974,7 @@ nchinit(void)
> 	int error;
> 
> 	TAILQ_INIT(&nclruhead);
> -	namecache_cache = pool_cache_init(sizeof(struct namecache),
> +	namecache_cache = pool_cache_init(sizeof(struct namecache) + NCHNAMLEN,
> 	    coherency_unit, 0, 0, "ncache", NULL, IPL_NONE, cache_ctor,
> 	    cache_dtor, NULL);
> 	KASSERT(namecache_cache != NULL);
> @@ -1248,7 +1252,11 @@ cache_reclaim(void)
> 			LIST_REMOVE(ncp, nc_hash);
> 			ncp->nc_hash.le_prev = NULL;
> 		}
> -		pool_cache_put(namecache_cache, ncp);
> +		if (ncp->nc_nlen > NCHNAMLEN) {
> +			cache_dtor(NULL, ncp);
> +			kmem_free(ncp, sizeof(*ncp) + ncp->nc_nlen);
> +		} else
> +			pool_cache_put(namecache_cache, ncp);
> 		ncp = next;
> 	}
> 	cache_unlock_cpus();

-- thorpej



Home | Main Index | Thread Index | Old Index