Subject: kernfs bug fixes, PR 24786
To: None <tech-kern@netbsd.org>
From: Christian Limpach <chris@pin.lu>
List: tech-kern
Date: 04/27/2004 17:07:30
--gBBFr7Ir9EOA20Yy
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Hello,

I'd like to add some additional nodes in /kern/xen for the Xen port.
While doing this, I've fixed PR 24786 and discovered and then fixed
the problem that stat and readdir return different inode numbers.


PR 24786:
This is fixed by patch kernfs_pr24786.patch
It fixes the problem and removes some code duplication.  The for loops
are actually not used unless someone would add additional static entries
in the ipsecsa_targets/ipsecsp_targets entries.


inode mismatch between stat and readdir:
This is fixed by patch kernfs_inodes.patch
Before patch logfile: kernfs_inodes.txt
After patch logfile: kernfs_inodes-fixed.txt

The cause of the problem is that the mapping from entry to fileno is
done in kernfs_subr.c:kernfs_allocvp and in kernfs_vnops.c:kernfs_readdir
and the mapping doesn't always give the same result.  I've changed
kernfs_readdir to use kernfs_allocvp to do the mapping.  This needed all
the kernfs_nodes to have a valid kfs_kt member to avoid a deadlock (see
check in kernfs_setdirentfileno).  It also required a change to the
KERNFS_FILENO mapping since it returned random results for kern_targets
not from the kern_targets array.
I've also changed the mapping such that only entries from the kern_targets
array have inodes with all bits higher than 6 clear.  I know this change
is not required but needed later when dynamically adding additional
entries to the root directory.


duplicate code removal after above patch:
Patch: kernfs_dupcode.patch
Since all kernfs_nodes have a valid kfs_kt member after the
kernfs_inodes.patch, the code in kernfs_subr.c:kernfs_allocvp which sets
the mode can be removed since the information is available in the
kern_target.


Unless there's something wrong with the changes, I'd like to commit this
in a couple of days.

    christian


--gBBFr7Ir9EOA20Yy
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="kernfs_pr24786.patch"

Index: miscfs/kernfs/kernfs_vnops.c
===================================================================
RCS file: /cvs/netbsd/src/sys/miscfs/kernfs/kernfs_vnops.c,v
retrieving revision 1.98
diff -p -u -r1.98 kernfs_vnops.c
--- miscfs/kernfs/kernfs_vnops.c	27 Sep 2003 13:29:02 -0000	1.98
+++ miscfs/kernfs/kernfs_vnops.c	19 Apr 2004 15:38:07 -0000
@@ -477,18 +477,16 @@ kernfs_lookup(v)
 
 #ifdef IPSEC
 	case KFSipsecsadir:
-		for (i = 0; i < nipsecsa_targets; i++) {
+		if (cnp->cn_flags & ISDOTDOT) {
+			kt = &kern_targets[0];
+			goto found;
+		}
+
+		for (i = 2; i < nipsecsa_targets; i++) {
 			kt = &ipsecsa_targets[i];
 			if (cnp->cn_namelen == kt->kt_namlen &&
-			    memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0) {
-				error = kernfs_allocvp(dvp->v_mount, vpp,
-				    kt->kt_tag, kt, 0);
-				if ((error == 0) && wantpunlock) {
-					VOP_UNLOCK(dvp, 0);
-					cnp->cn_flags |= PDIRUNLOCK;
-				}
-				return (error);
-			}
+			    memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0)
+				goto found;
 		}
 
 		ep = NULL;
@@ -504,18 +502,16 @@ kernfs_lookup(v)
 		return (error);
 
 	case KFSipsecspdir:
-		for (i = 0; i < nipsecsp_targets; i++) {
+		if (cnp->cn_flags & ISDOTDOT) {
+			kt = &kern_targets[0];
+			goto found;
+		}
+
+		for (i = 2; i < nipsecsp_targets; i++) {
 			kt = &ipsecsp_targets[i];
 			if (cnp->cn_namelen == kt->kt_namlen &&
-			    memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0) {
-				error = kernfs_allocvp(dvp->v_mount, vpp,
-				    kt->kt_tag, kt, 0);
-				if ((error == 0) && wantpunlock) {
-					VOP_UNLOCK(dvp, 0);
-					cnp->cn_flags |= PDIRUNLOCK;
-				}
-				return (error);
-			}
+			    memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0)
+				goto found;
 		}
 
 		ep = NULL;

--gBBFr7Ir9EOA20Yy
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="kernfs_inodes.patch"

--- kernfs_vnops.c	2004/04/19 16:08:20	1.1
+++ kernfs_vnops.c	2004/04/19 16:21:37
@@ -79,6 +79,7 @@
 
 #define	READ_MODE	(S_IRUSR|S_IRGRP|S_IROTH)
 #define	WRITE_MODE	(S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH)
+#define	UREAD_MODE	(S_IRUSR)
 #define DIR_MODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
 #define UDIR_MODE	(S_IRUSR|S_IXUSR)
 
@@ -125,6 +126,10 @@
      { DT_DIR, N("."),         0,            KFSipsecspdir,  VDIR, DIR_MODE   },
      { DT_DIR, N(".."),        0,            KFSkern,        VDIR, DIR_MODE   },
 };
+const struct kern_target ipsecsa_kt =
+     { DT_DIR, N(""),          0,            KFSipsecsa,     VREG, UREAD_MODE };
+const struct kern_target ipsecsp_kt =
+     { DT_DIR, N(""),          0,            KFSipsecsp,     VREG, UREAD_MODE };
 #endif
 #undef N
 int nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]);
@@ -494,7 +499,7 @@
 		if (!ep || *ep || ep == pname)
 			break;
 
-		error = kernfs_allocvp(dvp->v_mount, vpp, KFSipsecsa, NULL, id);
+		error = kernfs_allocvp(dvp->v_mount, vpp, KFSipsecsa, &ipsecsa_kt, id);
 		if ((error == 0) && wantpunlock) {
 			VOP_UNLOCK(dvp, 0);
 			cnp->cn_flags |= PDIRUNLOCK;
@@ -519,7 +524,7 @@
 		if (!ep || *ep || ep == pname)
 			break;
 
-		error = kernfs_allocvp(dvp->v_mount, vpp, KFSipsecsp, NULL, id);
+		error = kernfs_allocvp(dvp->v_mount, vpp, KFSipsecsp, &ipsecsp_kt, id);
 		if ((error == 0) && wantpunlock) {
 			VOP_UNLOCK(dvp, 0);
 			cnp->cn_flags |= PDIRUNLOCK;
@@ -789,6 +794,59 @@
 	return (kernfs_xwrite(kfs, strbuf, xlen));
 }
 
+static int
+kernfs_setdirentfileno_kt(struct dirent *d, const struct kern_target *kt,
+    u_int32_t value, struct vop_readdir_args *ap)
+{
+	struct kernfs_node *kfs;
+	struct vnode *vp;
+	int error;
+
+	if ((error = kernfs_allocvp(ap->a_vp->v_mount, &vp, kt->kt_tag, kt,
+	    value)) != 0)
+		return error;
+	if (kt->kt_tag == KFSdevice) {
+		struct vattr va;
+		if ((error = VOP_GETATTR(vp, &va, ap->a_cred,
+		    ap->a_uio->uio_segflg == UIO_USERSPACE ?
+		    ap->a_uio->uio_procp : &proc0)) != 0)
+			return (error);
+		d->d_fileno = va.va_fileid;
+	} else {
+		kfs = VTOKERN(vp);
+		d->d_fileno = kfs->kfs_fileno;
+	}
+	vput(vp);
+	return 0;
+}
+
+static int
+kernfs_setdirentfileno(struct dirent *d, off_t entry,
+    struct kernfs_node *thisdir_kfs, const struct kern_target *parent_kt,
+    const struct kern_target *kt, struct vop_readdir_args *ap)
+{
+	const struct kern_target *ikt;
+	int error;
+
+	switch (entry) {
+	case 0:
+		d->d_fileno = thisdir_kfs->kfs_fileno;
+		return 0;
+	case 1:
+		ikt = parent_kt;
+		break;
+	default:
+		ikt = kt;
+		break;
+	}
+	if (ikt != thisdir_kfs->kfs_kt) {
+		if ((error = kernfs_setdirentfileno_kt(d, ikt, 0, ap)) != 0)
+			return error;
+	} else
+		d->d_fileno = thisdir_kfs->kfs_fileno;
+	return 0;
+}
+
 int
 kernfs_readdir(v)
 	void *v;
@@ -849,11 +907,9 @@
 					continue;
 			}
 			d.d_namlen = kt->kt_namlen;
-			if (i < 2)
-				d.d_fileno = KERNFS_FILENO(&kern_targets[0],
-				    kern_targets[0].kt_tag, 0);
-			else
-				d.d_fileno = KERNFS_FILENO(kt, kt->kt_tag, 0);
+			if ((error = kernfs_setdirentfileno(&d, i, kfs,
+			    &kern_targets[0], kt, ap)) != 0)
+				break;
 			memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
 			d.d_type = kt->kt_type;
 			if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
@@ -923,7 +979,9 @@
 		for (; i < nipsecsa_targets && uio->uio_resid >= UIO_MX; i++) {
 			kt = &ipsecsa_targets[i];
 			d.d_namlen = kt->kt_namlen;
-			d.d_fileno = KERNFS_FILENO(kt, kt->kt_tag, 0);
+			if ((error = kernfs_setdirentfileno(&d, i, kfs,
+			    &kern_targets[0], kt, ap)) != 0)
+				break;
 			memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
 			d.d_type = kt->kt_type;
 			if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
@@ -950,8 +1008,9 @@
 				continue;
 			if (uio->uio_resid < UIO_MX)
 				break;
-			d.d_fileno = KERNFS_FILENO(kfs->kfs_kt, kfs->kfs_type,
-			    kfs->kfs_cookie);
+			if ((error = kernfs_setdirentfileno_kt(&d, &ipsecsa_kt,
+			    sav->spi, ap)) != 0)
+				break;
 			d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
 			    "%u", ntohl(sav->spi));
 			d.d_type = DT_REG;
@@ -985,7 +1044,9 @@
 		for (; i < nipsecsp_targets && uio->uio_resid >= UIO_MX; i++) {
 			kt = &ipsecsp_targets[i];
 			d.d_namlen = kt->kt_namlen;
-			d.d_fileno = KERNFS_FILENO(kt, kt->kt_tag, 0);
+			if ((error = kernfs_setdirentfileno(&d, i, kfs,
+			    &kern_targets[0], kt, ap)) != 0)
+				break;
 			memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
 			d.d_type = kt->kt_type;
 			if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
@@ -1002,8 +1063,9 @@
 		TAILQ_FOREACH(sp, &sptailq, tailq) {
 			if (uio->uio_resid < UIO_MX)
 				break;
-			d.d_fileno = KERNFS_FILENO(kfs->kfs_kt, kfs->kfs_type,
-			    kfs->kfs_cookie);
+			if ((error = kernfs_setdirentfileno_kt(&d, &ipsecsp_kt,
+			    sp->id, ap)) != 0)
+				break;
 			d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
 			    "%u", sp->id);
 			d.d_type = DT_REG;
--- kernfs.h	2004/04/19 16:24:55	1.1
+++ kernfs.h	2004/04/19 16:25:51
@@ -93,8 +93,9 @@
 #define UIO_MX	32
 
 #define KERNFS_FILENO(kt, typ, cookie) \
-	((kt) ? 2 + ((kt) - &kern_targets[0]) \
-	      : (((cookie) << 6) | ((typ) + nkern_targets)))
+	((kt >= &kern_targets[0] && kt < &kern_targets[nkern_targets]) ? \
+	    2 + ((kt) - &kern_targets[0]) \
+	      : (((cookie + 1) << 6) | (typ)))
 
 #define VFSTOKERNFS(mp)	((struct kernfs_mount *)((mp)->mnt_data))
 #define	VTOKERN(vp)	((struct kernfs_node *)(vp)->v_data)

--gBBFr7Ir9EOA20Yy
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="kernfs_inodes.txt"

# ls -lai
total 18
   2 dr-xr-xr-x   4 root  wheel           512 Apr 19 19:55 .
   2 drwxr-xr-x  29 root  wheel           512 Apr  9 01:03 ..
   4 -r--r--r--   1 root  wheel            11 Apr 19 19:53 boottime
   5 -r--r--r--   1 root  wheel           236 Apr 19 19:55 copyright
   6 -rw-r--r--   1 root  wheel             9 Apr 19 19:55 hostname
   7 -r--r--r--   1 root  wheel             4 Apr 19 19:55 hz
   8 dr-x------   2 root  wheel           512 Apr 19 19:55 ipsecsa
   9 dr-x------   2 root  wheel           512 Apr 19 19:55 ipsecsp
  10 -r--r--r--   1 root  wheel            17 Apr 19 19:55 loadavg
  11 -r--r--r--   1 root  wheel         16368 Apr 19 19:55 msgbuf
  12 -r--r--r--   1 root  wheel             5 Apr 19 19:55 pagesize
  13 -r--r--r--   1 root  wheel             6 Apr 19 19:55 physmem
 251 brw-r-----   1 root  operator      0, 13 Nov  9 00:56 rootdev
1664 crw-r-----   1 root  operator      3, 13 Nov  9 00:56 rrootdev
  16 -r--r--r--   1 root  wheel            18 Apr 19 19:55 time
  17 -r--r--r--   1 root  wheel           115 Apr 19 19:55 version
# ~chris/tmp/rd
ino 2 name .
ino 2 name ..
ino 4 name boottime
ino 5 name copyright
ino 6 name hostname
ino 7 name hz
ino 8 name ipsecsa
ino 9 name ipsecsp
ino 10 name loadavg
ino 11 name msgbuf
ino 12 name pagesize
ino 13 name physmem
ino 14 name rootdev
ino 15 name rrootdev
ino 16 name time
ino 17 name version
# cd ipsecsp
# ls -lai
total 1
   9 dr-x------  2 root  wheel  512 Apr 19 19:56 .
   2 dr-xr-xr-x  4 root  wheel  512 Apr 19 19:56 ..
 861 -r--------  1 root  wheel   96 Apr 19 19:56 16384
 925 -r--------  1 root  wheel   96 Apr 19 19:56 16385
 989 -r--------  1 root  wheel   96 Apr 19 19:56 16386
1053 -r--------  1 root  wheel   96 Apr 19 19:56 16387
1117 -r--------  1 root  wheel   96 Apr 19 19:56 16388
1181 -r--------  1 root  wheel   96 Apr 19 19:56 16389
1245 -r--------  1 root  wheel   96 Apr 19 19:56 16390
1309 -r--------  1 root  wheel   96 Apr 19 19:56 16391
1373 -r--------  1 root  wheel   96 Apr 19 19:56 16392
1437 -r--------  1 root  wheel   96 Apr 19 19:56 16393
1501 -r--------  1 root  wheel   96 Apr 19 19:56 16394
1565 -r--------  1 root  wheel   96 Apr 19 19:56 16395
1629 -r--------  1 root  wheel   96 Apr 19 19:56 16396
1693 -r--------  1 root  wheel   96 Apr 19 19:56 16397
1757 -r--------  1 root  wheel   96 Apr 19 19:56 16398
# ~chris/tmp/rd
ino 1431655786 name .
ino 1431655787 name ..
ino 9 name 16384
ino 9 name 16385
ino 9 name 16386
ino 9 name 16387
ino 9 name 16388
ino 9 name 16389
ino 9 name 16390
ino 9 name 16391
ino 9 name 16392
ino 9 name 16393
ino 9 name 16394
ino 9 name 16395
ino 9 name 16396
ino 9 name 16397
ino 9 name 16398

--gBBFr7Ir9EOA20Yy
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="kernfs_inodes-fixed.txt"

# ls -lai
total 18
   2 dr-xr-xr-x   4 root  wheel           512 Apr 19 20:23 .
   2 drwxr-xr-x  29 root  wheel           512 Apr  9 01:03 ..
   4 -r--r--r--   1 root  wheel            11 Apr 19 20:22 boottime
   5 -r--r--r--   1 root  wheel           236 Apr 19 20:23 copyright
   6 -rw-r--r--   1 root  wheel             9 Apr 19 20:23 hostname
   7 -r--r--r--   1 root  wheel             4 Apr 19 20:23 hz
   8 dr-x------   2 root  wheel           512 Apr 19 20:23 ipsecsa
   9 dr-x------   2 root  wheel           512 Apr 19 20:23 ipsecsp
  10 -r--r--r--   1 root  wheel            18 Apr 19 20:23 loadavg
  11 -r--r--r--   1 root  wheel         16368 Apr 19 20:23 msgbuf
  12 -r--r--r--   1 root  wheel             5 Apr 19 20:23 pagesize
  13 -r--r--r--   1 root  wheel             6 Apr 19 20:23 physmem
 251 brw-r-----   1 root  operator      0, 13 Nov  9 00:56 rootdev
1664 crw-r-----   1 root  operator      3, 13 Nov  9 00:56 rrootdev
  16 -r--r--r--   1 root  wheel            18 Apr 19 20:23 time
  17 -r--r--r--   1 root  wheel           115 Apr 19 20:23 version
# ~chris/tmp/rd
ino 2 name .
ino 2 name ..
ino 4 name boottime
ino 5 name copyright
ino 6 name hostname
ino 7 name hz
ino 8 name ipsecsa
ino 9 name ipsecsp
ino 10 name loadavg
ino 11 name msgbuf
ino 12 name pagesize
ino 13 name physmem
ino 251 name rootdev
ino 1664 name rrootdev
ino 16 name time
ino 17 name version
# cd ipsecsp/
# ls -lai
total 1
   9 dr-x------  2 root  wheel  512 Apr 19 20:23 .
   2 dr-xr-xr-x  4 root  wheel  512 Apr 19 20:23 ..
 909 -r--------  1 root  wheel   96 Apr 19 20:23 16384
 973 -r--------  1 root  wheel   96 Apr 19 20:23 16385
1037 -r--------  1 root  wheel   96 Apr 19 20:23 16386
1101 -r--------  1 root  wheel   96 Apr 19 20:23 16387
1165 -r--------  1 root  wheel   96 Apr 19 20:23 16388
1229 -r--------  1 root  wheel   96 Apr 19 20:23 16389
1293 -r--------  1 root  wheel   96 Apr 19 20:23 16390
1357 -r--------  1 root  wheel   96 Apr 19 20:23 16391
1421 -r--------  1 root  wheel   96 Apr 19 20:23 16392
1485 -r--------  1 root  wheel   96 Apr 19 20:23 16393
1549 -r--------  1 root  wheel   96 Apr 19 20:23 16394
1613 -r--------  1 root  wheel   96 Apr 19 20:23 16395
1677 -r--------  1 root  wheel   96 Apr 19 20:23 16396
1741 -r--------  1 root  wheel   96 Apr 19 20:23 16397
1805 -r--------  1 root  wheel   96 Apr 19 20:23 16398
# ~chris/tmp/rd
ino 9 name .
ino 2 name ..
ino 909 name 16384
ino 973 name 16385
ino 1037 name 16386
ino 1101 name 16387
ino 1165 name 16388
ino 1229 name 16389
ino 1293 name 16390
ino 1357 name 16391
ino 1421 name 16392
ino 1485 name 16393
ino 1549 name 16394
ino 1613 name 16395
ino 1677 name 16396
ino 1741 name 16397
ino 1805 name 16398

--gBBFr7Ir9EOA20Yy
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="kernfs_dupcode.patch"

Index: kernfs_subr.c
===================================================================
RCS file: /cvs/netbsd/src/sys/miscfs/kernfs/kernfs_subr.c,v
retrieving revision 1.5
diff -p -p -u -r1.5 kernfs_subr.c
--- kernfs_subr.c	27 Sep 2003 13:29:02 -0000	1.5
+++ kernfs_subr.c	19 Apr 2004 16:40:04 -0000
@@ -225,44 +225,11 @@ again:
 	kfs->kfs_fileno = KERNFS_FILENO(kt, kfs_type, kfs->kfs_cookie);
 	kfs->kfs_value = value;
 	kfs->kfs_kt = kt;
+	kfs->kfs_mode = kt->kt_mode;
+	vp->v_type = kt->kt_vtype;
 
-	switch (kfs_type) {
-	case KFSkern:	/* /kern = dr-xr-xr-x */
-		kfs->kfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
-		vp->v_type = VDIR;
+	if (kfs_type == KFSkern)
 		vp->v_flag = VROOT;
-		break;
-
-	case KFSnull:	/* /kern/?? = -r--r--r-- */
-	case KFStime:	/* /kern/time = -r--r--r-- */
-	case KFSint:	/* /kern/?? = -r--r--r-- */
-	case KFSstring:	/* /kern/?? = -r--r--r-- */
-	case KFSavenrun:	/* /kern/loadavg = -r--r--r-- */
-	case KFSmsgbuf:	/* /kern/msgbuf = -r--r--r-- */
-		kfs->kfs_mode = S_IRUSR|S_IRGRP|S_IROTH;
-		vp->v_type = VREG;
-		break;
-
-	case KFSipsecsadir:	/* /kern/ipsecsa = dr-x------ */
-	case KFSipsecspdir:	/* /kern/ipsecsp = dr-x------ */
-		kfs->kfs_mode = S_IRUSR|S_IXUSR;
-		vp->v_type = VDIR;
-		break;
-
-	case KFSipsecsa:	/* /kern/ipsecsa/N = -r-------- */
-	case KFSipsecsp:	/* /kern/ipsecsp/N = -r-------- */
-		kfs->kfs_mode = S_IRUSR;
-		vp->v_type = VREG;
-		break;
-
-	case KFShostname: /* /kern/hostname = -rw-r-r--- */
-		kfs->kfs_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH;
-		vp->v_type = VREG;
-		break;
-
-	default:
-		panic("kernfs_allocvp");
-	}
 
 	kernfs_hashins(kfs);
 	uvm_vnp_setsize(vp, 0);

--gBBFr7Ir9EOA20Yy--