NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

kern/39784: enhancement to raidframe autoconfig for layered raid devices



>Number:         39784
>Category:       kern
>Synopsis:       enhancement to raidframe autoconfig for layered raid devices
>Confidential:   no
>Severity:       non-critical
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          change-request
>Submitter-Id:   net
>Arrival-Date:   Wed Oct 22 10:30:00 +0000 2008
>Originator:     Wolfgang Stukenbrock
>Release:        NetBSD 4.0
>Organization:
Dr. Nagler & Company GmbH
>Environment:
        
        
System: NetBSD s012 4.0 NetBSD 4.0 (NSW-S012) #1: Thu Sep 11 12:21:03 CEST 2008 
root@s012:/usr/src/sys/arch/amd64/compile/NSW-S012 amd64
Architecture: x86_64
Machine: amd64
>Description:
        The current raidframe autoconfig code can only setup raid devices that 
are located on other physical devices that
        are present at boot time.
        As the manuals state, it is possible (and usefull) to layer raid 
devices in some cases.
        Most usefull are raid0 devices constructed from raid1 devices in order 
to get bejong the disk size limits ...
        Theese additional devices are not found by the autoconfig code, because 
it does not look onto the raid devices just
        configured for additional raid components. Such devices may be 
configured by storing there configuration in a
        setup file (e.g. /etc/raid5.conf) and let the startupscript raidframe 
do the setup very early while booting the system.
        When dooing this syou will loose a very usefull functionality of the 
raidframe auto config stuff: abstraction from the
        disk names.
        This is very usefull if someone added a new disk or a disk fails, 
because the remaining disk may get renumberd and any
        kind of config from a file cannot succeed.
        The following patch to rf_netbsdkintf.c will add the ability do setup 
layered raid devices via raidfrmae autoconfig.
        It has been tested with a raid 0 based on multiple raid 1 devices and 
no problems have been found anymore - up to now of cause.
        Please see the large comment in the patch below for a restriction of 
this patch!
        Perhaps a not should be added to the manual, it it gets integrated into 
to sources.

        remark: the fix is based on NetBSD 4.0 - not 4.0.1 or current. I've had 
a look at the current version and recognised changes
                there that will lead to a lot of work while integrating this 
patch. (And I'm using 4.0 at the moment ...)
>How-To-Repeat:
        not relevant - enhancement request ...
>Fix:
--- rf_netbsdkintf.c    2008/10/22 09:48:26     1.1
+++ rf_netbsdkintf.c    2008/10/22 10:10:51
@@ -213,6 +213,9 @@
     void *, int, struct proc *);
 static void raidinit(RF_Raid_t *);
 
+static RF_AutoConfig_t *rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, 
struct vnode *vp,
+    const char *cname, RF_SectorCount_t size);
+
 void raidattach(int);
 static int raid_match(struct device *, struct cfdata *, void *);
 static void raid_attach(struct device *, struct device *, void *);
@@ -309,7 +312,7 @@
 void rf_buildroothack(RF_ConfigSet_t *);
 
 RF_AutoConfig_t *rf_find_raid_components(void);
-RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
+RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *, RF_ConfigSet_t *);
 static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
 static int rf_reasonable_label(RF_ComponentLabel_t *);
 void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
@@ -428,7 +431,7 @@
        ac_list = rf_find_raid_components();
 
        /* 2. Sort them into their respective sets. */
-       config_sets = rf_create_auto_sets(ac_list);
+       config_sets = rf_create_auto_sets(ac_list, NULL);
 
        /*
         * 3. Evaluate each set andconfigure the valid ones.
@@ -443,11 +446,102 @@
        return 1;
 }
 
+static int rf_buildroot_sub(RF_ConfigSet_t *cset, int *r_id, int level)
+{
+       int retcode;
+
+       if (rf_have_enough_components(cset) >= level &&
+           cset->ac->clabel->autoconfigure == 1) {
+               retcode = rf_auto_config_set(cset, r_id);
+               if (!retcode) {
+#ifdef DEBUG
+                       printf("raid%d: configured ok\n", *r_id);
+#endif
+                       /* cleanup */
+                       if (cset->rootable)
+                               retcode = 1;
+                       rf_cleanup_config_set(cset);
+                       return retcode;
+               }
+#ifdef DEBUG
+               printf("Autoconfig failed with code %d for raid%d\n", retcode, 
*r_id);
+#endif
+               return -2;
+       }
+  return -1;
+}
+
+static RF_AutoConfig_t *
+rf_buildroot_search(int raidID)
+{
+       int bmajor, error, i;
+       dev_t bdev;
+       struct vnode *vp;
+       struct disklabel label;
+       RF_AutoConfig_t *ac_list = NULL;
+
+       bmajor = devsw_name2blk("raid", NULL, 0);
+       bdev = MAKEDISKDEV(bmajor, raidID, RAW_PART);
+       if (bdevvp(bdev, &vp))
+               panic("RAID can't alloc vnode in rf_buildroot_search()");
+
+       error = VOP_OPEN(vp, FREAD, NOCRED, 0);
+       if (error) {
+               printf("RAID-autoconfig: failed to open just configured RAID 
device raid%d (%d) - ignored\n", raidID, error);
+               vput(vp);
+               return NULL;
+       }
+       /* Ok, the disk exists.  Go get the disklabel. */
+       error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
+       if (error) {
+               /*
+                * XXX can't happen - open() would
+                * have errored out (or faked up one)
+                */
+               if (error != ENOTTY)
+                       printf("RAIDframe: can't get label for dev "
+                           "raid%d (%d)\n", raidID, error);
+       }
+
+       /* don't need this any more.  We'll allocate it again
+          a little later if we really do... */
+       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+       VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
+       vput(vp);
+
+       if (error)
+               return NULL;
+
+       for (i = 0; i < label.d_npartitions; i++) {
+               char cname[sizeof(ac_list->devname)];
+
+               /* We only support partitions marked as RAID */
+               if (label.d_partitions[i].p_fstype != FS_RAID)
+                       continue;
+
+               bdev = MAKEDISKDEV(bmajor, raidID, i);
+               if (bdevvp(bdev, &vp))
+                       panic("RAID can't alloc vnode");
+
+               error = VOP_OPEN(vp, FREAD, NOCRED, 0);
+               if (error) {
+                       /* Whatever... */
+                       vput(vp);
+                       continue;
+               }
+               snprintf(cname, sizeof(cname), "raid%d%c", raidID, 'a' + i);
+               ac_list = rf_get_component(ac_list, bdev, vp, cname, 
label.d_partitions[i].p_size);
+       }
+  return ac_list;
+}
+
 void
 rf_buildroothack(RF_ConfigSet_t *config_sets)
 {
        RF_ConfigSet_t *cset;
        RF_ConfigSet_t *next_cset;
+       RF_ConfigSet_t *prev_cset;
+       RF_AutoConfig_t *ac_list;
        int retcode;
        int raidID;
        int rootID;
@@ -455,37 +549,102 @@
 
        rootID = 0;
        num_root = 0;
+config_again:          /* first step: configure only "complete" devices */
        cset = config_sets;
+       prev_cset = NULL;
        while(cset != NULL ) {
                next_cset = cset->next;
-               if (rf_have_enough_components(cset) &&
-                   cset->ac->clabel->autoconfigure==1) {
-                       retcode = rf_auto_config_set(cset,&raidID);
-                       if (!retcode) {
-#ifdef DEBUG
-                               printf("raid%d: configured ok\n", raidID);
-#endif
-                               if (cset->rootable) {
-                                       rootID = raidID;
-                                       num_root++;
-                               }
-                       } else {
-                               /* The autoconfig didn't work :( */
-#ifdef DEBUG
-                               printf("Autoconfig failed with code %d for 
raid%d\n", retcode, raidID);
-#endif
-                               rf_release_all_vps(cset);
+               retcode = rf_buildroot_sub(cset, &raidID, 2); /* only complete 
raid devices ... */
+               if (retcode >= 0) {
+                       if (retcode >= 1) {
+                               rootID = raidID;
+                               num_root++;
+                       }
+                       if (prev_cset == NULL) /* this set has been configured 
and the config data is gone - remove it from the list ... */
+                               config_sets = next_cset;
+                       else
+                               prev_cset->next = next_cset;
+                       if ((ac_list = rf_buildroot_search(raidID)) != NULL) {
+               add_new_components:
+       /*
+        * XXXXXX
+        *
+        * We may have a problem with our new config list ...
+        *
+        * We may not add any component to the remaining sets if we have 
already configured that one.
+        * Otherwise we will add a new set information for the raid device 
already configured by this routine.
+        * This can only happen, if some components are layered raid devices 
and some not - see below too.
+        * 
+        * In worst case, this may lead to a mirror splitted into two degraded 
devices.
+        *
+        * This problem can only occure if there are failed (missing) 
components, because if all components
+        * are available, the first loop will handle everything and there this 
problem is only be relevant for spare devices.
+        * (in some setups hot-spare devices may get dropped from a raid during 
autoconfig if they come from a higher layer-level ...)
+        *
+        * On the other hand, we are talking only about layered raid devices 
here!
+        * And the described problem can only occure with such kind of 
components - components that reside on a raid device.
+        * The only realy usefull setup here (from my oppinion) would be a 
raid0 (stripe) device that is constructed from some
+        * raid 1 devices (and perhaps 4 or 5 - possible but less usefull) in 
order to increase the total size of the raid device.
+        * (remark: concatenation is not avaliable in raidframe ...)
+        * And a raid 0 device has no spare disks at all.
+        *
+        * If it is intended to catch all (strange) setup-cases, we need an 
addtional list of already configured devices and
+        * drop additional components for those devices from the list. Dropping 
should be don in a verbose way, because we
+        * are gooing to change the raid configuration during boot here!
+        * For now this list is not implemented.
+        *
+        * In order to avoid configuration of layered raid devices prior all 
"lower-level" raid devices have been configured, we
+        * will add new devices to the end of the list of all sets in 
rf_create_auto_sets() - see change below.
+        * So we have a stable and reliable autoconfig here, if all components 
of a raid device are at the same layer-level.
+        * And we can build up as much levels we ever want to and autoconfigure 
them during startup.
+        *
+        * W. Stukenbrock
+        */
+                               config_sets = rf_create_auto_sets(ac_list, 
config_sets);
+                               goto config_again;
                        }
-               } else {
-#ifdef DEBUG
-                       printf("raid%d: not enough components\n", raidID);
-#endif
-                       /* we're not autoconfiguring this set...
-                          release the associated resources */
-                       rf_release_all_vps(cset);
+               } else { /* ignore any kind of error here - we retry later ... 
*/
+                       prev_cset = cset; /* not complete - just try next one 
in this loop ... */
+               }
+               cset = next_cset;
+       }
+                       /* second step, try to configure the rest - on failure, 
give them an additional try later */
+                       /* perhaps some additional components are found and we 
will succeed later ... */
+       cset = config_sets;
+       prev_cset = NULL;
+       while(cset != NULL ) {
+               next_cset = cset->next;
+               retcode = rf_buildroot_sub(cset, &raidID, 1); /* all raid 
devices that may have a chance ... */
+               if (retcode >= 0) {
+                       if (retcode >= 1) {
+                               rootID = raidID;
+                               num_root++;
+                       }
+                       if (prev_cset == NULL) /* this set has been configured 
and the config data is gone - remove it from the list ... */
+                               config_sets = next_cset;
+                       else
+                               prev_cset->next = next_cset;
+                       if ((ac_list = rf_buildroot_search(raidID)) != NULL) 
goto add_new_components;
+               } else { /* ignore any kind of error here - we retry later ... 
*/
+                       prev_cset = cset; /* not complete - just try next one 
in this loop ... */
+               }
+               cset = next_cset;
+       }
+                       /* third step, try to configure the rest - if still not 
possible, drop it */
+                       /* it makes no sense to search for  additonal 
components anymore - has already been done in the second step */
+       cset = config_sets;
+       while(cset != NULL ) {
+               next_cset = cset->next;
+               retcode = rf_buildroot_sub(cset, &raidID, 1); /* all raid 
devices that may have a chance ... */
+               if (retcode >= 0) {
+                       if (retcode >= 1) {
+                               rootID = raidID;
+                               num_root++;
+                       }
+               } else { /* sorry - faild to configure ... */
+                       rf_release_all_vps(cset); /* release everything - 
configuration not required or failed ... */
+                       rf_cleanup_config_set(cset);
                }
-               /* cleanup */
-               rf_cleanup_config_set(cset);
                cset = next_cset;
        }
 
@@ -2993,7 +3152,7 @@
 #endif
 
 RF_ConfigSet_t *
-rf_create_auto_sets(RF_AutoConfig_t *ac_list)
+rf_create_auto_sets(RF_AutoConfig_t *ac_list, RF_ConfigSet_t *start_cfg)
 {
        RF_AutoConfig_t *ac;
        RF_ConfigSet_t *config_sets;
@@ -3001,7 +3160,7 @@
        RF_AutoConfig_t *ac_next;
 
 
-       config_sets = NULL;
+       config_sets = start_cfg;
 
        /* Go through the AutoConfig list, and figure out which components
           belong to what sets.  */
@@ -3046,9 +3205,17 @@
                                }
                                cset->ac = ac;
                                ac->next = NULL;
-                               cset->next = config_sets;
                                cset->rootable = 0;
-                               config_sets = cset;
+                               if (start_cfg == NULL) { /* "initial setup" 
case - order does not mather ... */
+                                       cset->next = config_sets;
+                                       config_sets = cset;
+                               } else { /* append new sets to end of list - 
needed for layered devices that are added here ... */
+                                       RF_ConfigSet_t *cs;
+
+                                       cset->next = NULL;
+                                       for (cs = config_sets; cs->next != 
NULL; cs = cs->next);
+                                       cs->next = cset;
+                               }
                        }
                }
                ac = ac_next;
@@ -3215,7 +3382,8 @@
        }
        /* otherwise, all is well, and we've got enough to take a kick
           at autoconfiguring this set */
-       return(1);
+       if (num_missing > 0) return (1);
+       return(2);
 }
 
 void

>Unformatted:
        
        


Home | Main Index | Thread Index | Old Index