Subject: Re: kern/31944 - Fix to reduce tmpfs memory usage
To: None <kern-bug-people@netbsd.org, gnats-admin@netbsd.org,>
From: Julio M. Merino Vidal <jmmv84@gmail.com>
List: netbsd-bugs
Date: 04/14/2006 17:55:02
The following reply was made to PR kern/31944; it has been noted by GNATS.

From: "Julio M. Merino Vidal" <jmmv84@gmail.com>
To: tech-kern@netbsd.org, gnats-bugs@netbsd.org
Cc: 
Subject: Re: kern/31944 - Fix to reduce tmpfs memory usage
Date: Fri, 14 Apr 2006 19:51:14 +0200

 ------=_Part_7760_25248237.1145037074927
 Content-Type: text/plain; charset=ISO-8859-1
 Content-Transfer-Encoding: quoted-printable
 Content-Disposition: inline
 
 Hello,
 
 PR kern/31944 lists three issues that make tmpfs consume too much
 memory.  The attached patch addresses the first of them.  It does the
 following:
 
 - Only keep the node identifier/generation number when deleting a node.
   The size of tmpfs_nid is much smaller than that of tmpfs_node.
 - Avoid keeping nodes that were initialized but then discarded due to some
   error.  See the keepit parameter to the tmpfs_free_node function.
 
 I have read the replies to that PR but the proposed solutions seem to be
 "not right".  Also, one question that arises... can there be two live files=
  in a
 file system with the same node number?  If not, all those solutions do not
 seem to address this...
 
 tmpfs passes the regression test suite after the changes.
 
 Please raise your comments.
 
 --
 Julio M. Merino Vidal <jmmv84@gmail.com>
 The Julipedia - http://julipedia.blogspot.com/
 
 ------=_Part_7760_25248237.1145037074927
 Content-Type: text/x-patch; name=patch.diff; charset=us-ascii
 Content-Transfer-Encoding: 7bit
 X-Attachment-Id: f_em0tgium
 Content-Disposition: attachment; filename="patch.diff"
 
 Index: tmpfs.h
 ===================================================================
 RCS file: /cvsroot/src/sys/fs/tmpfs/tmpfs.h,v
 retrieving revision 1.18
 diff -u -p -r1.18 tmpfs.h
 --- tmpfs.h	31 Mar 2006 20:27:49 -0000	1.18
 +++ tmpfs.h	14 Apr 2006 17:42:54 -0000
 @@ -93,6 +93,25 @@ TAILQ_HEAD(tmpfs_dir, tmpfs_dirent);
  /* --------------------------------------------------------------------- */
  
  /*
 + * Node identifier (node number/generation number pair).
 + *
 + * This information is kept separate from the tmpfs_node structure so
 + * that it can easily be left alive when an existing node is deleted.
 + * This is because the system needs to reuse node numbers when it reaches
 + * the node limit and therefore it has to assign a different generation
 + * number.
 + */
 +struct tmpfs_nid {
 +	LIST_ENTRY(tmpfs_nid)	tni_entries;
 +
 +	ino_t			tni_id;
 +	unsigned long		tni_gen;
 +};
 +LIST_HEAD(tmpfs_nid_list, tmpfs_nid);
 +
 +/* --------------------------------------------------------------------- */
 +
 +/*
   * Internal representation of a tmpfs file system node.
   *
   * This structure is splitted in two parts: one holds attributes common
 @@ -112,8 +131,8 @@ struct tmpfs_node {
  	 * and faster, as we do not need to convert between two types. */
  	enum vtype		tn_type;
  
 -	/* Node identifier. */
 -	ino_t			tn_id;
 +	/* Node identifier and generation number. */
 +	struct tmpfs_nid *	tn_nid;
  
  	/* Node's internal status.  This is used by several file system
  	 * operations to do modifications to the node in a delayed
 @@ -137,7 +156,6 @@ struct tmpfs_node {
  	struct timespec		tn_mtime;
  	struct timespec		tn_ctime;
  	struct timespec		tn_birthtime;
 -	unsigned long		tn_gen;
  
  	/* Head of byte-level lock list (used by tmpfs_advlock). */
  	struct lockf *		tn_lockf;
 @@ -258,21 +276,23 @@ struct tmpfs_mount {
  	 * i.e., they refer to existing files.  The available list contains
  	 * all nodes that are currently available for use by new files.
  	 * Nodes must be kept in this list (instead of deleting them)
 -	 * because we need to keep track of their generation number (tn_gen
 -	 * field).
 +	 * because we need to keep track of their generation number and
 +	 * because we do not want to use the same node number for two live
 +	 * files.
  	 *
  	 * Note that nodes are lazily allocated: if the available list is
  	 * empty and we have enough space to create more nodes, they will be
  	 * created and inserted in the used list.  Once these are released,
  	 * they will go into the available list, remaining alive until the
  	 * file system is unmounted. */
 +	struct tmpfs_nid_list	tm_nodes_avail;
  	struct tmpfs_node_list	tm_nodes_used;
 -	struct tmpfs_node_list	tm_nodes_avail;
  
  	/* Pools used to store file system meta data.  These are not shared
  	 * across several instances of tmpfs for the reasons described in
  	 * tmpfs_pool.c. */
  	struct tmpfs_pool	tm_dirent_pool;
 +	struct tmpfs_pool	tm_nid_pool;
  	struct tmpfs_pool	tm_node_pool;
  	struct tmpfs_str_pool	tm_str_pool;
  };
 @@ -300,7 +320,7 @@ struct tmpfs_fid {
  int	tmpfs_alloc_node(struct tmpfs_mount *, enum vtype,
  	    uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *,
  	    char *, dev_t, struct proc *, struct tmpfs_node **);
 -void	tmpfs_free_node(struct tmpfs_mount *, struct tmpfs_node *);
 +void	tmpfs_free_node(struct tmpfs_mount *, struct tmpfs_node *, boolean_t);
  int	tmpfs_alloc_dirent(struct tmpfs_mount *, struct tmpfs_node *,
  	    const char *, uint16_t, struct tmpfs_dirent **);
  void	tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *,
 Index: tmpfs_subr.c
 ===================================================================
 RCS file: /cvsroot/src/sys/fs/tmpfs/tmpfs_subr.c,v
 retrieving revision 1.18
 diff -u -p -r1.18 tmpfs_subr.c
 --- tmpfs_subr.c	16 Feb 2006 14:57:50 -0000	1.18
 +++ tmpfs_subr.c	14 Apr 2006 17:42:54 -0000
 @@ -93,6 +93,7 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp
      uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
      char *target, dev_t rdev, struct proc *p, struct tmpfs_node **node)
  {
 +	struct tmpfs_nid *nid;
  	struct tmpfs_node *nnode;
  
  	/* If the root directory of the 'tmp' file system is not yet
 @@ -104,24 +105,35 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp
  
  	KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
  
 -	nnode = NULL;
 +	nnode = (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
 +	if (nnode == NULL)
 +		return ENOSPC;
 +	KASSERT(nnode != NULL);
 +
 +	nid = NULL;
  	if (LIST_EMPTY(&tmp->tm_nodes_avail)) {
  		KASSERT(tmp->tm_nodes_last <= tmp->tm_nodes_max);
 -		if (tmp->tm_nodes_last == tmp->tm_nodes_max)
 +		if (tmp->tm_nodes_last == tmp->tm_nodes_max) {
 +			TMPFS_POOL_PUT(&tmp->tm_node_pool, nnode);
  			return ENOSPC;
 +		}
  
 -		nnode =
 -		    (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
 -		if (nnode == NULL)
 +		nid = (struct tmpfs_nid *)TMPFS_POOL_GET(&tmp->tm_nid_pool, 0);
 +		if (nid == NULL) {
 +			TMPFS_POOL_PUT(&tmp->tm_node_pool, nnode);
  			return ENOSPC;
 -		nnode->tn_id = tmp->tm_nodes_last++;
 -		nnode->tn_gen = 0;
 +		}
 +
 +		nid->tni_id = tmp->tm_nodes_last++;
 +		nid->tni_gen = 0;
  	} else {
 -		nnode = LIST_FIRST(&tmp->tm_nodes_avail);
 -		LIST_REMOVE(nnode, tn_entries);
 -		nnode->tn_gen++;
 +		nid = LIST_FIRST(&tmp->tm_nodes_avail);
 +		LIST_REMOVE(nid, tni_entries);
 +		nid->tni_gen++;
  	}
 -	KASSERT(nnode != NULL);
 +	KASSERT(nid != NULL);
 +	nnode->tn_nid = nid;
 +
  	LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
  
  	/* Generic initialization. */
 @@ -168,7 +180,7 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp
  		    tmpfs_str_pool_get(&tmp->tm_str_pool, nnode->tn_size, 0);
  		if (nnode->tn_spec.tn_lnk.tn_link == NULL) {
  			nnode->tn_type = VNON;
 -			tmpfs_free_node(tmp, nnode);
 +			tmpfs_free_node(tmp, nnode, FALSE);
  			return ENOSPC;
  		}
  		memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size);
 @@ -202,17 +214,17 @@ tmpfs_alloc_node(struct tmpfs_mount *tmp
   * to the user are the removal of empty directories and the deletion of
   * individual files.
   *
 - * Note that nodes are not really deleted; in fact, when a node has been
 - * allocated, it cannot be deleted during the whole life of the file
 - * system.  Instead, they are moved to the available list and remain there
 - * until reused.
 + * If keepit is TRUE, the node is not really deleted because, when a node
 + * has been allocated, it cannot be deleted during the whole life of the
 + * file system.  Instead, its identifier is moved to the available list
 + * and remain there in case it needs to be reused.
   */
  void
 -tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
 +tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node,
 +    boolean_t keepit)
  {
 -	ino_t id;
 -	unsigned long gen;
  	size_t pages;
 +	struct tmpfs_nid *nid;
  
  	switch (node->tn_type) {
  	case VNON:
 @@ -253,13 +265,12 @@ tmpfs_free_node(struct tmpfs_mount *tmp,
  	tmp->tm_pages_used -= pages;
  
  	LIST_REMOVE(node, tn_entries);
 -	id = node->tn_id;
 -	gen = node->tn_gen;
 -	memset(node, 0, sizeof(struct tmpfs_node));
 -	node->tn_id = id;
 -	node->tn_type = VNON;
 -	node->tn_gen = gen;
 -	LIST_INSERT_HEAD(&tmp->tm_nodes_avail, node, tn_entries);
 +	nid = node->tn_nid;
 +	if (keepit)
 +		LIST_INSERT_HEAD(&tmp->tm_nodes_avail, nid, tni_entries);
 +	else
 +		TMPFS_POOL_PUT(&tmp->tm_nid_pool, nid);
 +	TMPFS_POOL_PUT(&tmp->tm_node_pool, node);
  }
  
  /* --------------------------------------------------------------------- */
 @@ -505,7 +516,7 @@ tmpfs_alloc_file(struct vnode *dvp, stru
  	error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
  	    &de);
  	if (error != 0) {
 -		tmpfs_free_node(tmp, node);
 +		tmpfs_free_node(tmp, node, FALSE);
  		goto out;
  	}
  
 @@ -513,7 +524,7 @@ tmpfs_alloc_file(struct vnode *dvp, stru
  	error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
  	if (error != 0) {
  		tmpfs_free_dirent(tmp, de, TRUE);
 -		tmpfs_free_node(tmp, node);
 +		tmpfs_free_node(tmp, node, FALSE);
  		goto out;
  	}
  
 @@ -637,7 +648,7 @@ tmpfs_dir_getdotdent(struct tmpfs_node *
  	TMPFS_VALIDATE_DIR(node);
  	KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
  
 -	dent.d_fileno = node->tn_id;
 +	dent.d_fileno = node->tn_nid->tni_id;
  	dent.d_type = DT_DIR;
  	dent.d_namlen = 1;
  	dent.d_name[0] = '.';
 @@ -675,7 +686,7 @@ tmpfs_dir_getdotdotdent(struct tmpfs_nod
  	TMPFS_VALIDATE_DIR(node);
  	KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
  
 -	dent.d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
 +	dent.d_fileno = node->tn_spec.tn_dir.tn_parent->tn_nid->tni_id;
  	dent.d_type = DT_DIR;
  	dent.d_namlen = 2;
  	dent.d_name[0] = '.';
 @@ -767,7 +778,7 @@ tmpfs_dir_getdents(struct tmpfs_node *no
  
  		/* Create a dirent structure representing the current
  		 * tmpfs_node and fill it. */
 -		d.d_fileno = de->td_node->tn_id;
 +		d.d_fileno = de->td_node->tn_nid->tni_id;
  		switch (de->td_node->tn_type) {
  		case VBLK:
  			d.d_type = DT_BLK;
 Index: tmpfs_vfsops.c
 ===================================================================
 RCS file: /cvsroot/src/sys/fs/tmpfs/tmpfs_vfsops.c,v
 retrieving revision 1.11
 diff -u -p -r1.11 tmpfs_vfsops.c
 --- tmpfs_vfsops.c	16 Feb 2006 14:57:50 -0000	1.11
 +++ tmpfs_vfsops.c	14 Apr 2006 17:42:55 -0000
 @@ -168,6 +168,8 @@ tmpfs_mount(struct mount *mp, const char
  	tmp->tm_pages_used = 0;
  	tmpfs_pool_init(&tmp->tm_dirent_pool, sizeof(struct tmpfs_dirent),
  	    "dirent", tmp);
 +	tmpfs_pool_init(&tmp->tm_nid_pool, sizeof(struct tmpfs_nid),
 +	    "nid", tmp);
  	tmpfs_pool_init(&tmp->tm_node_pool, sizeof(struct tmpfs_node),
  	    "node", tmp);
  	tmpfs_str_pool_init(&tmp->tm_str_pool, tmp);
 @@ -206,6 +208,7 @@ tmpfs_unmount(struct mount *mp, int mntf
  	int error;
  	int flags = 0;
  	struct tmpfs_mount *tmp;
 +	struct tmpfs_nid *nid;
  	struct tmpfs_node *node;
  
  	/* Handle forced unmounts. */
 @@ -243,20 +246,21 @@ tmpfs_unmount(struct mount *mp, int mntf
  		}
  
  		next = LIST_NEXT(node, tn_entries);
 -		tmpfs_free_node(tmp, node);
 +		tmpfs_free_node(tmp, node, FALSE);
  		node = next;
  	}
 -	node = LIST_FIRST(&tmp->tm_nodes_avail);
 -	while (node != NULL) {
 -		struct tmpfs_node *next;
 -
 -		next = LIST_NEXT(node, tn_entries);
 -		LIST_REMOVE(node, tn_entries);
 -		TMPFS_POOL_PUT(&tmp->tm_node_pool, node);
 -		node = next;
 +	nid = LIST_FIRST(&tmp->tm_nodes_avail);
 +	while (nid != NULL) {
 +		struct tmpfs_nid *next;
 +
 +		next = LIST_NEXT(nid, tni_entries);
 +		LIST_REMOVE(nid, tni_entries);
 +		TMPFS_POOL_PUT(&tmp->tm_nid_pool, nid);
 +		nid = next;
  	}
  
  	tmpfs_pool_destroy(&tmp->tm_dirent_pool);
 +	tmpfs_pool_destroy(&tmp->tm_nid_pool);
  	tmpfs_pool_destroy(&tmp->tm_node_pool);
  	tmpfs_str_pool_destroy(&tmp->tm_str_pool);
  
 @@ -320,8 +324,8 @@ tmpfs_fhtovp(struct mount *mp, struct fi
  
  	found = FALSE;
  	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
 -		if (node->tn_id == tfhp->tf_id &&
 -		    node->tn_gen == tfhp->tf_gen) {
 +		if (node->tn_nid->tni_id == tfhp->tf_id &&
 +		    node->tn_nid->tni_gen == tfhp->tf_gen) {
  			found = TRUE;
  			break;
  		}
 @@ -342,8 +346,8 @@ tmpfs_vptofh(struct vnode *vp, struct fi
  	node = VP_TO_TMPFS_NODE(vp);
  
  	tfhp->tf_len = sizeof(struct tmpfs_fid);
 -	tfhp->tf_id = node->tn_id;
 -	tfhp->tf_gen = node->tn_gen;
 +	tfhp->tf_id = node->tn_nid->tni_id;
 +	tfhp->tf_gen = node->tn_nid->tni_gen;
  
  	return 0;
  }
 @@ -356,6 +360,7 @@ tmpfs_statvfs(struct mount *mp, struct s
  {
  	fsfilcnt_t freenodes, usednodes;
  	struct tmpfs_mount *tmp;
 +	struct tmpfs_nid *dummyid;
  	struct tmpfs_node *dummy;
  
  	tmp = VFS_TO_TMPFS(mp);
 @@ -368,7 +373,7 @@ tmpfs_statvfs(struct mount *mp, struct s
  
  	freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_last,
  	    TMPFS_PAGES_AVAIL(tmp) * PAGE_SIZE / sizeof(struct tmpfs_node));
 -	LIST_FOREACH(dummy, &tmp->tm_nodes_avail, tn_entries)
 +	LIST_FOREACH(dummyid, &tmp->tm_nodes_avail, tni_entries)
  		freenodes++;
  
  	usednodes = 0;
 Index: tmpfs_vnops.c
 ===================================================================
 RCS file: /cvsroot/src/sys/fs/tmpfs/tmpfs_vnops.c,v
 retrieving revision 1.22
 diff -u -p -r1.22 tmpfs_vnops.c
 --- tmpfs_vnops.c	21 Feb 2006 03:19:45 -0000	1.22
 +++ tmpfs_vnops.c	14 Apr 2006 17:42:55 -0000
 @@ -430,14 +430,14 @@ tmpfs_getattr(void *v)
  	vap->va_uid = node->tn_uid;
  	vap->va_gid = node->tn_gid;
  	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
 -	vap->va_fileid = node->tn_id;
 +	vap->va_fileid = node->tn_nid->tni_id;
  	vap->va_size = node->tn_size;
  	vap->va_blocksize = PAGE_SIZE;
  	vap->va_atime = node->tn_atime;
  	vap->va_mtime = node->tn_mtime;
  	vap->va_ctime = node->tn_ctime;
  	vap->va_birthtime = node->tn_birthtime;
 -	vap->va_gen = node->tn_gen;
 +	vap->va_gen = node->tn_nid->tni_gen;
  	vap->va_flags = node->tn_flags;
  	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
  		node->tn_spec.tn_dev.tn_rdev : VNOVAL;
 @@ -1266,7 +1266,7 @@ tmpfs_reclaim(void *v)
  	 * we must free its associated data structures (now that the vnode
  	 * is being reclaimed). */
  	if (node->tn_links == 0)
 -		tmpfs_free_node(tmp, node);
 +		tmpfs_free_node(tmp, node, TRUE);
  
  	KASSERT(!VOP_ISLOCKED(vp));
  	KASSERT(vp->v_data == NULL);
 
 
 ------=_Part_7760_25248237.1145037074927--