Subject: I need some help adding another mount to nfs_mountroot()....
To: NetBSD Kernel Technical Discussion List <tech-kern@NetBSD.ORG>
From: Greg A. Woods <woods@weird.com>
List: tech-kern
Date: 10/06/2003 15:05:42
I've been experimenting with easier ways to set up servers for diskless
clients.

Initially I thought I might export a union mount of the server's root
filesystem onto some client-specific directory, but of course I soon
learned that union mounts don't like to contain what they're mounted
over/under and everything's a sub-dir of "/"  :-).

So, I quickly realized that a union mount of /etc to, say,
/export/client/etc would work just as well.

Except then I discovered that NFS isn't yet able to export unionfs mount
points.

But anyway that paved the way to deciding to at least try just exporting
the root filesystem as well as a unique copy of /etc for each client
(and of course a unique /var).  I really don't care about /usr/share as
I want the whole client system to be identical with the server, save of
course for /etc and /var.  (If NFS exports of union mounts worked then
/export/client/var could also be a union mount of the server's /var and
that would give decent trasparency for pkg_info and perhaps other things
as well.)

Right from the start of this journey I had begun hacking on the kernel
to also request a path for /etc and to mount it at boot time (i.e. right
at the same time it mounted its root filesystem).

The following patches seem to get me most of the way -- the only problem
is that I'm getting an EACCES when trying to find the vnode for the
directory that'll be covered over by the new mount of /etc.

(I think it mostly works because commenting out the code that tries to
find the vnode for "/etc" and thus not setting vcovp->v_mountedhere gets
me a semblance of a mount point, but of course namei() can't see it
without a v_mountedhere value so it's just decorative and not functional.)

Here's what I see on the client with the following patches:

	[[ ... normal device probes ... ]]
	root on le0
	nfs_boot: trying RARP (and RPC/bootparam)
	nfs_boot: client_addr=204.92.254.3 (RARP from 204.92.254.2)
	nfs_boot: server_addr=204.92.254.2
	nfs_boot: hostname=very.weird.com
	nfs_boot: gateway=204.92.254.6
	nfs_boot: my_mask=255.255.255.0
	root on most.weird.com:/
	nfs_mount_diskless: cannot find vnode for /etc: error 13
	no file system for le0
	cannot mount root, error = 79
	root device (default le0): 

I've tried using both the p_ucred from proc0 and a fresh and zeroed
struct cred from crget() but I get the same result from both.

So, what am I missing in setting up for my call to nfs_lookitup(), or is
it a bigger problem somewhere else?

(note that at some point once I get this working I'll make the attempt
to get the mount point for /etc non-fatal so that if one's not specified
the old way of just mounting a root filesystem will still work...)

Index: nfsdiskless.h
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/nfs/nfsdiskless.h,v
retrieving revision 1.16
diff -u -r1.16 nfsdiskless.h
--- nfsdiskless.h	21 Feb 1999 15:07:49 -0000	1.16
+++ nfsdiskless.h	5 Oct 2003 21:14:33 -0000
@@ -63,6 +63,7 @@
 	struct in_addr nd_gwip; /* My gateway */
 	/* Information for each mount point we need. */
 	struct nfs_dlmount nd_root; 	/* Mount info for root */
+	struct nfs_dlmount nd_etc; 	/* Mount info for /etc */
 };
 
 int nfs_boot_init __P((struct nfs_diskless *nd, struct proc *procp));
Index: nfs_vfsops.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/nfs/nfs_vfsops.c,v
retrieving revision 1.112.10.3
diff -u -r1.112.10.3 nfs_vfsops.c
--- nfs_vfsops.c	4 Oct 2003 08:34:30 -0000	1.112.10.3
+++ nfs_vfsops.c	6 Oct 2003 18:30:44 -0000
@@ -123,7 +123,8 @@
 extern u_int32_t nfs_prog, nfs_vers;
 
 static int nfs_mount_diskless __P((struct nfs_dlmount *, const char *,
-    struct mount **, struct vnode **, struct proc *));
+				   struct vnode **, struct mount **,
+				   struct proc *, struct mount *));
 
 #define TRUE	1
 #define	FALSE	0
@@ -285,17 +286,19 @@
 #endif
 
 /*
- * Mount a remote root fs via. NFS.  It goes like this:
+ * Mount a remote root fs and /etc via. NFS.  It goes like this:
  * - Call nfs_boot_init() to fill in the nfs_diskless struct
  * - build the rootfs mount point and call mountnfs() to do the rest.
+ * - build the etcfs mount point and call mountnfs() to do the rest.
  */
 int
 nfs_mountroot()
 {
 	struct nfs_diskless *nd;
 	struct vattr attr;
-	struct mount *mp;
-	struct vnode *vp;
+	struct vnode *vp;		/* vnode for just-mounted fs */
+	struct mount *rmp = NULL;	/* root's mountpoint */
+	struct mount *emp = NULL;	/* /etc's mountpoint */
 	struct proc *procp;
 	long n;
 	int error;
@@ -329,21 +332,15 @@
 	/*
 	 * Create the root mount point.
 	 */
-	error = nfs_mount_diskless(&nd->nd_root, "/", &mp, &vp, procp);
-	if (error)
-		goto out;
+	error = nfs_mount_diskless(&nd->nd_root, "/", &vp, &rmp, procp, (struct mount *) NULL);
+	if (error) {
+		nfs_boot_cleanup(nd, procp);
+		free(nd, M_NFSMNT);
+		return (error);
+	}
+	rootvp = vp;		/* remember this vnode! */
 	printf("root on %s\n", nd->nd_root.ndm_host);
 
-	/*
-	 * Link it into the mount list.
-	 */
-	simple_lock(&mountlist_slock);
-	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
-	simple_unlock(&mountlist_slock);
-	rootvp = vp;
-	mp->mnt_vnodecovered = NULLVP;
-	vfs_unbusy(mp);
-
 	/* Get root attributes (for the time). */
 	error = VOP_GETATTR(vp, &attr, procp->p_ucred, procp);
 	if (error)
@@ -354,37 +351,83 @@
 #endif
 	inittodr(n);
 
-out:
-	if (error)
+	/*
+	 * Create the /etc mount point.
+	 */
+	error = nfs_mount_diskless(&nd->nd_etc, "/etc", &vp, &emp, procp, rmp);
+	if (error) {
 		nfs_boot_cleanup(nd, procp);
+		free(nd, M_NFSMNT);
+		return (error);
+	}
+	printf("/etc on %s\n", nd->nd_etc.ndm_host);
+
+	/* Get /etc attributes (for fun). */
+	error = VOP_GETATTR(vp, &attr, procp->p_ucred, procp);
+	if (error)
+		panic("nfs_mountroot: getattr for /etc");
+
 	free(nd, M_NFSMNT);
-	return (error);
+	return 0;
 }
 
 /*
- * Internal version of mount system call for diskless setup.
- * Separate function because we used to call it twice.
- * (once for root and once for swap)
+ * Internal version of mount system call for diskless setup.  Separate function
+ * because we used to call it up to three times (once for root, and once for
+ * /etc), and because we don't need to do 1/10'th the error checking done by
+ * the real sys_mount().
  */
 static int
-nfs_mount_diskless(ndmntp, mntname, mpp, vpp, p)
+nfs_mount_diskless(ndmntp, mntname, vpp, mpp, p, dmp)
 	struct nfs_dlmount *ndmntp;
 	const char *mntname;	/* mount point name */
-	struct mount **mpp;
-	struct vnode **vpp;
+	struct vnode **vpp;	/* ptr to returnd vp structure */
+	struct mount **mpp;	/* ptr to returnd mp structure */
 	struct proc *p;
+	struct mount *dmp;	/* mount point of the root for non-"/" mounts */
 {
-	struct mount *mp;
+	struct mount *mp;	/* resulting mount point hooked into mountlist */
 	struct mbuf *m;
+	struct vnode *vcovp = NULL;	/* vnode for mntname for non-"/" mounts */
 	int error;
 
-	vfs_rootmountalloc(MOUNT_NFS, (char *)mntname, &mp);
+	if (strcmp(mntname, "/") == 0) {
+		error = vfs_rootmountalloc(MOUNT_NFS, "nfs_root_dev", &mp);
+	} else {
+		struct nfsnode *np = NULL;
+		struct vnode *dvp = NULL;
+
+		if ((error = nfs_root(dmp, &dvp))) {
+			printf("nfs_mount_diskless: cannot find vnode for mountpoint of %s: error %d\n",
+			       mntname, error);
+			return (error);
+		}
+		if ((error = nfs_lookitup(dvp, mntname, strlen(mntname), p->p_ucred, p, &np))) {
+			printf("nfs_mount_diskless: cannot find nfsnode for %s: error %d\n",
+			       mntname, error);
+			return (error);
+		}
+		vput(dvp);
+		vcovp = NFSTOV(np);
+		if (vcovp->v_mountedhere != NULL) {
+			vput(vcovp);
+			return (EBUSY);
+		}
+		error = vfs_mountalloc(MOUNT_NFS, "nfs_fs_dev", (char *) mntname, &mp, vcovp);
+	}
+	if (error) {
+		if (vcovp)
+			vput(vcovp);
+		printf("nfs_mount_diskless: cannot allocate mountpoint for %s: error %d\n",
+		       mntname, error);
+		return (error);
+	}
 
 	mp->mnt_op = &nfs_vfsops;
 
 	/*
-	 * Historical practice expects NFS root file systems to
-	 * be initially mounted r/w.
+	 * Historical practice expects NFS file systems mounted at boot to be
+	 * initially mounted r/w.
 	 */
 	mp->mnt_flag &= ~MNT_RDONLY;
 
@@ -400,13 +443,30 @@
 	if (error) {
 		mp->mnt_op->vfs_refcount--;
 		vfs_unbusy(mp);
-		printf("nfs_mountroot: mount %s failed: %d\n",
+		printf("nfs_mountroot: `mount %s' failed: %d\n",
 		       mntname, error);
 		free(mp, M_MOUNT);
-	} else
-		*mpp = mp;
+		if (vcovp)
+			vput(vcovp);
+		return (error);
+	}
+	/*
+	 * Link it into the mount list.
+	 */
+	simple_lock(&mountlist_slock);
+	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	simple_unlock(&mountlist_slock);
+	if (vcovp) {
+		vcovp->v_mountedhere = mp;
+		VOP_UNLOCK(vcovp, 0);
+	}
+	vfs_unbusy(mp);
+	(void) nfs_statfs(mp, &mp->mnt_stat, p);
+	if ((error = nfs_start(mp, 0, p)) != 0)
+		return (error);
+	*mpp = mp;
 
-	return (error);
+	return 0;
 }
 
 void
Index: nfs_bootparam.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/nfs/nfs_bootparam.c,v
retrieving revision 1.19
diff -u -r1.19 nfs_bootparam.c
--- nfs_bootparam.c	10 Nov 2001 10:59:09 -0000	1.19
+++ nfs_bootparam.c	5 Oct 2003 21:29:25 -0000
@@ -88,7 +88,12 @@
  * (2) Use RPC/bootparam/whoami to get our hostname,
  *     our IP address, and the server's IP address.
  * (3) Use RPC/bootparam/getfile to get the root path
+ * (3a)Use RPC/bootparam/getfile to get the /etc path
  * (4) Use RPC/mountd to get the root file handle
+ * (4a)Use RPC/mountd to get the /etc file handle
+ *
+ * XXX still could be implemented, but not really needed....
+ *
  * (5) Use RPC/bootparam/getfile to get the swap path
  * (6) Use RPC/mountd to get the swap file handle
  */
@@ -102,7 +107,7 @@
 
 /*
  * Get client name, gateway address, then
- * get root and swap server:pathname info.
+ * get root and /etc (and eventually swap) server:pathname info.
  * RPCs: bootparam/whoami, bootparam/getfile
  *
  * Use the old broadcast address for the WHOAMI
@@ -191,13 +196,18 @@
 
 	/*
 	 * Now fetch the server:pathname strings and server IP
-	 * for root and swap.  Missing swap is not fatal.
+	 * for root and etc.
 	 */
 	error = bp_getfile(sin, "root", &nd->nd_root);
 	if (error) {
 		printf("nfs_boot: bootparam get root: %d\n", error);
 		goto delout;
 	}
+	error = bp_getfile(sin, "etc", &nd->nd_etc);
+	if (error) {
+		printf("nfs_boot: bootparam get etc: %d\n", error);
+		goto delout;
+	}
 
 #ifndef NFS_BOOTPARAM_NOGATEWAY
 	gw_ndm = malloc(sizeof(*gw_ndm), M_NFSMNT, M_WAITOK);
@@ -420,7 +430,7 @@
 	if (m == NULL)
 		return (ENOMEM);
 
-	/* key name (root or swap) */
+	/* key name (root, etc, or swap) */
 	m->m_next = xdr_string_encode(key, strlen(key));
 	if (m->m_next == NULL)
 		return (ENOMEM);
Index: nfs_boot.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/nfs/nfs_boot.c,v
retrieving revision 1.57
diff -u -r1.57 nfs_boot.c
--- nfs_boot.c	10 Nov 2001 10:59:09 -0000	1.57
+++ nfs_boot.c	6 Oct 2003 00:45:52 -0000
@@ -122,7 +122,7 @@
 	}
 	nd->nd_ifp = ifp;
 
-	error = EADDRNOTAVAIL; /* ??? */
+	error = EPROTONOSUPPORT;
 #if defined(NFS_BOOT_BOOTP) || defined(NFS_BOOT_DHCP)
 	if (error && nfs_boot_rfc951) {
 #if defined(NFS_BOOT_DHCP)
@@ -153,7 +153,10 @@
 	 * Now fetch the NFS file handles as appropriate.
 	 */
 	error = nfs_boot_getfh(&nd->nd_root);
+	if (error)
+		nfs_boot_cleanup(nd, procp);
 
+	error = nfs_boot_getfh(&nd->nd_etc);
 	if (error)
 		nfs_boot_cleanup(nd, procp);
 
@@ -595,7 +598,7 @@
 	 */
 	pathname = strchr(ndm->ndm_host, ':');
 	if (pathname == 0) {
-		printf("nfs_boot: getfh - no pathname\n");
+		printf("nfs_boot: getfh - no pathname in %s\n", ndm->ndm_host);
 		return (EIO);
 	}
 	pathname++;
Index: vfs_subr.c
===================================================================
RCS file: /cvs/master/m-NetBSD/main/src/sys/kern/vfs_subr.c,v
retrieving revision 1.174.2.1
diff -u -r1.174.2.1 vfs_subr.c
--- vfs_subr.c	2 Jun 2002 15:29:56 -0000	1.174.2.1
+++ vfs_subr.c	6 Oct 2003 02:44:11 -0000
@@ -274,12 +274,13 @@
 	struct vfsops *vfsp = NULL;
 	struct mount *mp;
 
-	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
+	LIST_FOREACH(vfsp, &vfs_list, vfs_list)	/* XXX this is a gawd-awful-ugly abuse of cpp!!!! */
 		if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
 			break;
 
 	if (vfsp == NULL)
 		return (ENODEV);
+
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	memset((char *)mp, 0, (u_long)sizeof(struct mount));
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
@@ -293,8 +294,53 @@
 	mp->mnt_stat.f_mntonname[0] = '/';
 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 	*mpp = mp;
+
+	return (0);
+}
+
+/*
+ * Lookup a filesystem type, and if found allocate and initialize
+ * a mount structure for it.
+ *
+ * sys_mount() should use this, i.e.....
+ *
+ * XXX this code should be merged with vfs_rootmountalloc() and mntonname=="/"
+ * should be used to determine whether the root mount point is being allocated.
+ */
+int
+vfs_mountalloc(fstypename, devname, mntonname, mpp, vcovp)
+	char *fstypename;
+	char *devname;
+	char *mntonname;
+	struct mount **mpp;
+	struct vnode *vcovp;
+{
+	struct vfsops *vfsp = NULL;
+	struct mount *mp;		/* new mount point */
+
+	LIST_FOREACH(vfsp, &vfs_list, vfs_list)	/* XXX this is a gawd-awful-ugly abuse of cpp!!!! */
+		if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
+			break;
+
+	if (vfsp == NULL)
+		return (ENODEV);
+
+	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+	memset((char *)mp, 0, (u_long)sizeof(struct mount));
+	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
+	(void)vfs_busy(mp, LK_NOWAIT, 0);
+	mp->mnt_op = vfsp;
+	mp->mnt_flag = MNT_RDONLY;
+	mp->mnt_vnodecovered = vcovp;
+	vfsp->vfs_refcount++;
+	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
+	(void) copystr(mntonname, mp->mnt_stat.f_mntonname, MNAMELEN - 1, 0);
+	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
+	*mpp = mp;
+
 	return (0);
 }
+
 
 /*
  * Lookup a mount point by filesystem identifier.

-- 
						Greg A. Woods

+1 416 218-0098                  VE3TCP            RoboHack <woods@robohack.ca>
Planix, Inc. <woods@planix.com>          Secrets of the Weird <woods@weird.com>