Subject: root configuration with multiple root-enabled RAID sets
To: None <tech-kern@netbsd.org>
From: Emmanuel Dreyfus <manu@netbsd.org>
List: tech-kern
Date: 10/11/2006 20:43:02
--KsGdsel6WgEHnImy
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

The situation: I have a machine with multiple RAID sets, several sets are
configured as root capables.

That causes rf_buildroothack() to set RB_ASKNAME so that the admin can 
decide which root should be used.

IMO this is a wrong behavior, as this will cause setroot() to ignore
a root specification hardcoded in the kernel config file: if I build
a kernel with "root at raid0a type ffs dumps on raid0b", it will
still ask me for the root.

The obious fix is that rf_buildroothack() should not set RB_ASKNAME
and let setroot() use the root specification if it exists, and ask the
user if it does not. 

That has one drawback. If you have two RAID 1
  raid0a -> wd0a wd1a, root capable
  raid1a -> wd2a wd3a, root capable
And no root specification.

Then rf_buildroothack() would leave the boot device as NULL. setroot()
will make an attempt. It will discover e.g.: wd0a. So here we get an 
undesired behavior, because we would have prefered to be asked rather
than booting on wd0a.

The attached patch changes rf_buildroothack() so that it gets smarter: if
there are multiple root RAID sets, then rf_buildroothack() will call 
cpu_rootconf() so that MD code can tell us what is the boot device. Next, 
we will try to find the RAID set that includes the boot device, and if
we find one, we use it.

Is it okay to commit that?

-- 
Emmanuel Dreyfus
manu@netbsd.org

--KsGdsel6WgEHnImy
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="rf_mroot.diff"

Index: rf_netbsdkintf.c
===================================================================
RCS file: /cvsroot/src/sys/dev/raidframe/rf_netbsdkintf.c,v
retrieving revision 1.214
diff -U4 -r1.214 rf_netbsdkintf.c
--- rf_netbsdkintf.c	28 Sep 2006 02:39:50 -0000	1.214
+++ rf_netbsdkintf.c	11 Oct 2006 16:15:37 -0000
@@ -1,4 +1,5 @@
+#define DEBUG
 /*	$NetBSD: rf_netbsdkintf.c,v 1.214 2006/09/28 02:39:50 oster Exp $	*/
 /*-
  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -482,10 +483,11 @@
 					num_root++;
 				}
 			} else {
 				/* The autoconfig didn't work :( */
-#if DEBUG
-				printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
+#ifdef DEBUG
+				printf("Autoconfig failed with code %d "
+				    "for raid%d\n", retcode, raidID);
 #endif
 				rf_release_all_vps(cset);
 			}
 		} else {
@@ -500,16 +502,63 @@
 		rf_cleanup_config_set(cset);
 		cset = next_cset;
 	}
 
+
 	/* we found something bootable... */
+	if (num_root == 1) {
+		booted_device = &raidrootdev[rootID];
+		return;
+	}
+#ifdef DEBUG
+	printf("Found %d root RAID sets\n", num_root);
+#endif
+
+	/* 
+	 * Maybe the MD code can help. If it cannot, then setroot() will
+	 * discover that we have no booted_device and will ask the user
+	 * if nothing was hardwired in the kernel config file
+	 */
+	if (booted_device == NULL)
+		cpu_rootconf();
+	if (booted_device == NULL) 
+		return;
+
+	num_root = 0;
+	for (raidID = 0; raidID < numraid; raidID++) {
+		int col;
+
+		if (raidPtrs[raidID]->valid == 0)
+			continue;
+
+		if (raidPtrs[raidID]->root_partition == 0)
+			continue;
+
+		for (col = 0; col < raidPtrs[raidID]->numCol; col++) {
+			char *devname;
+
+			devname = raidPtrs[raidID]->Disks[col].devname;
+			devname += sizeof("/dev/") - 1;
+			if (strncmp(devname, booted_device->dv_xname, 
+			    strlen(booted_device->dv_xname)) != 0)
+				continue;
+#ifdef DEBUG
+			printf("raid%d includes boot device %s\n",
+			    raidID, devname);
+#endif
+			num_root++;
+			rootID = raidID;
+		}
+	}
 
 	if (num_root == 1) {
 		booted_device = &raidrootdev[rootID];
-	} else if (num_root > 1) {
-		/* we can't guess.. require the user to answer... */
-		boothowto |= RB_ASKNAME;
+		return;
 	}
+
+	printf("Found %d possible root RAID sets\n", num_root);
+
+	return;
 }
 
 
 int
@@ -1074,9 +1123,9 @@
 		   trying to patch things.
 		   */
 
 		raidid = raidPtr->raidid;
-#if DEBUG
+#ifdef DEBUG
 		printf("raid%d: Got component label:\n", raidid);
 		printf("raid%d: Version: %d\n", raidid, clabel->version);
 		printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
 		printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
@@ -2616,9 +2665,9 @@
 	if (!raidread_component_label(dev, vp, clabel)) {
 		    /* Got the label.  Does it look reasonable? */
 		    if (rf_reasonable_label(clabel) && 
 			(clabel->partitionSize <= size)) {
-#if DEBUG
+#ifdef DEBUG
 			    printf("Component on: %s: %llu\n",
 				cname, (unsigned long long)size);
 			    rf_print_component_label(clabel);
 #endif
@@ -2806,9 +2855,9 @@
 	return(0);
 }
 
 
-#if DEBUG
+#ifdef DEBUG
 void
 rf_print_component_label(RF_ComponentLabel_t *clabel)
 {
 	printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
@@ -3007,9 +3056,9 @@
 		while(ac!=NULL) {
 			if ((ac->clabel->column == c) &&
 			    (ac->clabel->mod_counter == mod_counter)) {
 				/* it's this one... */
-#if DEBUG
+#ifdef DEBUG
 				printf("Found: %s at %d\n",
 				       ac->devname,c);
 #endif
 				break;
@@ -3236,9 +3285,9 @@
 	RF_Config_t *config;
 	int raidID;
 	int retcode;
 
-#if DEBUG
+#ifdef DEBUG
 	printf("RAID autoconfigure\n");
 #endif
 
 	retcode = 0;
@@ -3291,9 +3340,9 @@
 		free(config, M_RAIDFRAME);
 		return(1);
 	}
 
-#if DEBUG
+#ifdef DEBUG
 	printf("Configuring raid%d:\n",raidID);
 #endif
 
 	raidPtr = raidPtrs[raidID];

--KsGdsel6WgEHnImy--