Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/netbsd-6]: src/sys/dev/raidframe Pull up following revision(s) (requeste...



details:   https://anonhg.NetBSD.org/src/rev/862e71d739b1
branches:  netbsd-6
changeset: 773798:862e71d739b1
user:      riz <riz%NetBSD.org@localhost>
date:      Thu Feb 23 02:22:05 2012 +0000

description:
Pull up following revision(s) (requested by oster in ticket #23):
        sys/dev/raidframe/rf_reconstruct.c: revision 1.118
        sys/dev/raidframe/rf_reconmap.c: revision 1.34
comment, and effectively remove, a DIAGNOSTIC check that
is invalid for RAID5_RS.
Add logic to the main reconstruction loop to handle RAID5 with rotated
spares.  While here, observe that we were actually doing one more
stripe than we thought we were, and correct that too (it didn't matter
for non-RAID5_RS, but it definitely does for RAID5_RS).  Add some
bounds-checking at the beginning to handle the case where the number
of stripes in the set is smaller than the sliding reconstruction window.
XXX: this problem likely needs to be fixed for PARITY_DECLUSTERING too.

diffstat:

 sys/dev/raidframe/rf_reconmap.c    |  12 +++++-
 sys/dev/raidframe/rf_reconstruct.c |  66 +++++++++++++++++++++++++++++++++++--
 2 files changed, 71 insertions(+), 7 deletions(-)

diffs (156 lines):

diff -r fe87c68542fd -r 862e71d739b1 sys/dev/raidframe/rf_reconmap.c
--- a/sys/dev/raidframe/rf_reconmap.c   Thu Feb 23 02:18:54 2012 +0000
+++ b/sys/dev/raidframe/rf_reconmap.c   Thu Feb 23 02:22:05 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: rf_reconmap.c,v 1.33 2011/08/31 18:31:02 plunky Exp $  */
+/*     $NetBSD: rf_reconmap.c,v 1.33.8.1 2012/02/23 02:22:05 riz Exp $ */
 /*
  * Copyright (c) 1995 Carnegie-Mellon University.
  * All rights reserved.
@@ -34,7 +34,7 @@
  *************************************************************************/
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.33 2011/08/31 18:31:02 plunky Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_reconmap.c,v 1.33.8.1 2012/02/23 02:22:05 riz Exp $");
 
 #include "rf_raid.h"
 #include <sys/time.h>
@@ -157,7 +157,14 @@
 
                /* do we need to move the queue? */
                while (i > mapPtr->high_ru) {
+#if 0
 #ifdef DIAGNOSTIC
+                       /* XXX: The check below is not valid for
+                        * RAID5_RS.  It is valid for RAID 1 and RAID 5.
+                        * The issue is that we can easily have
+                        * RU_NOTHING entries here too, and those are
+                        * quite correct.
+                        */
                        if (mapPtr->status[mapPtr->head]!=RU_ALL) {
                                printf("\nraid%d: reconmap incorrect -- working on i %" PRIu64 "\n",
                                       raidPtr->raidid, i);
@@ -170,6 +177,7 @@
                                panic("reconmap incorrect");
                        } 
 #endif
+#endif
                        mapPtr->low_ru++;
                        mapPtr->high_ru++;
                        /* initialize "highest" RU status entry, which
diff -r fe87c68542fd -r 862e71d739b1 sys/dev/raidframe/rf_reconstruct.c
--- a/sys/dev/raidframe/rf_reconstruct.c        Thu Feb 23 02:18:54 2012 +0000
+++ b/sys/dev/raidframe/rf_reconstruct.c        Thu Feb 23 02:22:05 2012 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: rf_reconstruct.c,v 1.117 2011/10/14 09:23:30 hannken Exp $     */
+/*     $NetBSD: rf_reconstruct.c,v 1.117.8.1 2012/02/23 02:22:05 riz Exp $     */
 /*
  * Copyright (c) 1995 Carnegie-Mellon University.
  * All rights reserved.
@@ -33,7 +33,7 @@
  ************************************************************/
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.117 2011/10/14 09:23:30 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.117.8.1 2012/02/23 02:22:05 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/time.h>
@@ -570,6 +570,9 @@
        RF_ReconCtrl_t *tmp_reconctrl;
        RF_ReconEvent_t *event;
        RF_StripeCount_t incPSID,lastPSID,num_writes,pending_writes,prev;
+#if RF_INCLUDE_RAID5_RS > 0
+       RF_StripeCount_t startPSID,endPSID,aPSID,bPSID,offPSID;
+#endif
        RF_ReconUnitCount_t RUsPerPU;
        struct timeval etime, elpsd;
        unsigned long xor_s, xor_resid_us;
@@ -622,7 +625,17 @@
        recon_error = 0;
        write_error = 0;
        pending_writes = incPSID;
-       raidPtr->reconControl->lastPSID = incPSID;
+       raidPtr->reconControl->lastPSID = incPSID - 1;
+
+       /* bounds check raidPtr->reconControl->lastPSID and
+          pending_writes so that we don't attempt to wait for more IO
+          than can possibly happen */
+
+       if (raidPtr->reconControl->lastPSID > lastPSID)
+               raidPtr->reconControl->lastPSID = lastPSID;
+
+       if (pending_writes > lastPSID)
+               pending_writes = lastPSID;
 
        /* start the actual reconstruction */
 
@@ -636,6 +649,49 @@
                }
 
                num_writes = 0;
+
+#if RF_INCLUDE_RAID5_RS > 0
+               /* For RAID5 with Rotated Spares we will be 'short'
+                  some number of writes since no writes will get
+                  issued for stripes where the spare is on the
+                  component being rebuilt.  Account for the shortage
+                  here so that we don't hang indefinitely below
+                  waiting for writes to complete that were never
+                  scheduled.
+
+                  XXX: Should be fixed for PARITY_DECLUSTERING and
+                  others too! 
+
+               */
+
+               if (raidPtr->Layout.numDataCol < 
+                   raidPtr->numCol - raidPtr->Layout.numParityCol) {
+                       /* numDataCol is at least 2 less than numCol, so
+                          should be RAID 5 with Rotated Spares */
+
+                       /* XXX need to update for RAID 6 */
+                       
+                       startPSID = raidPtr->reconControl->lastPSID - pending_writes + 1;
+                       endPSID = raidPtr->reconControl->lastPSID;
+                       
+                       offPSID = raidPtr->numCol - col - 1;
+                       
+                       aPSID = startPSID - startPSID % raidPtr->numCol + offPSID;
+                       if (aPSID < startPSID) {
+                               aPSID += raidPtr->numCol;
+                       }
+                       
+                       bPSID = endPSID - ((endPSID - offPSID) % raidPtr->numCol);
+                       
+                       if (aPSID < endPSID) {
+                               num_writes = ((bPSID - aPSID) / raidPtr->numCol) + 1;
+                       }
+                       
+                       if ((aPSID == endPSID) && (bPSID == endPSID)) {
+                               num_writes++;
+                       }
+               }
+#endif
                
                /* issue a read for each surviving disk */
                
@@ -714,7 +770,7 @@
 #endif
                }
 
-               /* reads done, wakup any waiters, and then wait for writes */
+               /* reads done, wakeup any waiters, and then wait for writes */
 
                rf_WakeupHeadSepCBWaiters(raidPtr);
 
@@ -1134,7 +1190,7 @@
                        ctrl->ru_count = 0;
                        /* code left over from when head-sep was based on
                         * parity stripe id */
-                       if (ctrl->curPSID >= raidPtr->reconControl->lastPSID) {
+                       if (ctrl->curPSID > raidPtr->reconControl->lastPSID) {
                                CheckForNewMinHeadSep(raidPtr, ++(ctrl->headSepCounter));
                                return (RF_RECON_DONE_READS);   /* finito! */
                        }



Home | Main Index | Thread Index | Old Index