Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src Finally commit the RAIDframe parity map Summer Of Code project.



details:   https://anonhg.NetBSD.org/src/rev/c3151a70b623
branches:  trunk
changeset: 749093:c3151a70b623
user:      jld <jld%NetBSD.org@localhost>
date:      Tue Nov 17 18:54:26 2009 +0000

description:
Finally commit the RAIDframe parity map Summer Of Code project.

Drastically reduces the amount of time spent rewriting parity after an
unclean shutdown by keeping better track of which regions might have had
outstanding writes.  Enabled by default; can be disabled on a per-set
basis, or tuned, with the new raidctl(8) commands.

Discussed on tech-kern@ to a general air of approval; exhortations to
commit from mrg@, christos@, and others.

Thanks to Google for their sponsorship, oster@ for mentoring the
project, assorted developers for trying very hard to break it, and
probably more I'm forgetting.

diffstat:

 sbin/raidctl/raidctl.8             |   50 ++-
 sbin/raidctl/raidctl.c             |  139 ++++++-
 sys/dev/raidframe/files.raidframe  |    3 +-
 sys/dev/raidframe/raidframeio.h    |    7 +-
 sys/dev/raidframe/raidframevar.h   |   40 +-
 sys/dev/raidframe/rf_copyback.c    |   26 +-
 sys/dev/raidframe/rf_disks.c       |   19 +-
 sys/dev/raidframe/rf_driver.c      |   18 +-
 sys/dev/raidframe/rf_engine.c      |   12 +-
 sys/dev/raidframe/rf_kintf.h       |   16 +-
 sys/dev/raidframe/rf_netbsdkintf.c |  393 +++++++++++-------
 sys/dev/raidframe/rf_paritymap.c   |  750 +++++++++++++++++++++++++++++++++++++
 sys/dev/raidframe/rf_paritymap.h   |  125 ++++++
 sys/dev/raidframe/rf_parityscan.c  |   21 +-
 sys/dev/raidframe/rf_parityscan.h  |    3 +-
 sys/dev/raidframe/rf_raid.h        |    5 +-
 sys/dev/raidframe/rf_reconstruct.c |   46 +-
 sys/dev/raidframe/rf_states.c      |   14 +-
 18 files changed, 1450 insertions(+), 237 deletions(-)

diffs (truncated from 2463 to 300 lines):

diff -r f8a0fdb19ca5 -r c3151a70b623 sbin/raidctl/raidctl.8
--- a/sbin/raidctl/raidctl.8    Tue Nov 17 18:44:33 2009 +0000
+++ b/sbin/raidctl/raidctl.8    Tue Nov 17 18:54:26 2009 +0000
@@ -1,4 +1,4 @@
-.\"     $NetBSD: raidctl.8,v 1.56 2008/08/28 21:24:30 wiz Exp $
+.\"     $NetBSD: raidctl.8,v 1.57 2009/11/17 18:54:26 jld Exp $
 .\"
 .\" Copyright (c) 1998, 2002 The NetBSD Foundation, Inc.
 .\" All rights reserved.
@@ -96,6 +96,16 @@
 .Fl I Ar serial_number Ar dev
 .Nm
 .Op Fl v
+.Fl m Ar dev
+.Nm
+.Op Fl v
+.Fl M
+.Oo yes | no | set
+.Ar params
+.Oc
+.Ar dev
+.Nm
+.Op Fl v
 .Fl p Ar dev
 .Nm
 .Op Fl v
@@ -222,6 +232,44 @@
 This step
 .Em MUST
 be performed when a new RAID set is created.
+.It Fl m Ar dev
+Display status information about the parity map on the RAID set, if any.
+If used with
+.Fl v
+then the current contents of the parity map will be output (in
+hexadecimal format) as well.
+.It Fl M Ic yes Ar dev
+.\"XXX should there be a section with more info on the parity map feature?
+Enable the use of a parity map on the RAID set; this is the default,
+and greatly reduces the time taken to check parity after unclean
+shutdowns at the cost of some very slight overhead during normal
+operation.
+Changes to this setting will take effect the next time the set is
+configured.
+Note that RAID-0 sets, having no parity, will not use a parity map in
+any case.
+.It Fl M Ic no Ar dev
+Disable the use of a parity map on the RAID set; doing this is not
+recommended.
+This will take effect the next time the set is configured.
+.It Fl M Ic set Ar cooldown Ar tickms Ar regions Ar dev
+Alter the parameters of the parity map; parameters to leave unchanged
+can be given as 0, and trailing zeroes may be omitted.
+.\"XXX should this explanation be deferred to another section as well?
+The RAID set is divided into 
+.Ar regions
+regions; each region is marked dirty for at most
+.Ar cooldown
+intervals of 
+.Ar tickms
+milliseconds each after a write to it, and at least
+.Ar cooldown
+\- 1 such intervals.
+Changes to
+.Ar regions
+take effect the next time is configured, while changes to the other
+parameters are applied immediately.
+The default parameters are expected to be reasonable for most workloads.
 .It Fl p Ar dev
 Check the status of the parity on the RAID set.
 Displays a status message,
diff -r f8a0fdb19ca5 -r c3151a70b623 sbin/raidctl/raidctl.c
--- a/sbin/raidctl/raidctl.c    Tue Nov 17 18:44:33 2009 +0000
+++ b/sbin/raidctl/raidctl.c    Tue Nov 17 18:54:26 2009 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: raidctl.c,v 1.41 2009/10/11 12:14:05 pooka Exp $   */
+/*      $NetBSD: raidctl.c,v 1.42 2009/11/17 18:54:26 jld Exp $   */
 
 /*-
  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
@@ -39,7 +39,7 @@
 #include <sys/cdefs.h>
 
 #ifndef lint
-__RCSID("$NetBSD: raidctl.c,v 1.41 2009/10/11 12:14:05 pooka Exp $");
+__RCSID("$NetBSD: raidctl.c,v 1.42 2009/11/17 18:54:26 jld Exp $");
 #endif
 
 
@@ -85,13 +85,15 @@
 static  void do_meter(int, u_long);
 static  void get_bar(char *, double, int);
 static  void get_time_string(char *, int);
+static  void rf_output_pmstat(int, int);
+static  void rf_pm_configure(int, int, char *, int[]);
 
 int verbose;
 
 int
 main(int argc,char *argv[])
 {
-       int ch;
+       int ch, i;
        int num_options;
        unsigned long action;
        char config_filename[PATH_MAX];
@@ -99,6 +101,8 @@
        char name[PATH_MAX];
        char component[PATH_MAX];
        char autoconf[10];
+       char *parityconf = NULL;
+       int parityparams[3];
        int do_output;
        int do_recon;
        int do_rewrite;
@@ -124,7 +128,7 @@
        rump_init();
 #endif
 
-       while ((ch = getopt(argc, argv, "a:A:Bc:C:f:F:g:GiI:l:r:R:sSpPuv")) 
+       while ((ch = getopt(argc, argv, "a:A:Bc:C:f:F:g:GiI:l:mM:r:R:sSpPuv")) 
               != -1)
                switch(ch) {
                case 'a':
@@ -188,6 +192,23 @@
                        serial_number = atoi(optarg);
                        num_options++;
                        break;
+               case 'm':
+                       action = RAIDFRAME_PARITYMAP_STATUS;
+                       openmode = O_RDONLY;
+                       num_options++;
+                       break;
+               case 'M':
+                       action = RAIDFRAME_PARITYMAP_SET_DISABLE;
+                       parityconf = strdup(optarg);
+                       num_options++;
+                       /* XXXjld: should rf_pm_configure do the atoi()s? */
+                       i = 0;
+                       while (i < 3 && optind < argc &&
+                           isdigit((int)argv[optind][0]))
+                               parityparams[i++] = atoi(argv[optind++]);
+                       while (i < 3)
+                               parityparams[i++] = 0;
+                       break;
                case 'l': 
                        action = RAIDFRAME_SET_COMPONENT_LABEL;
                        strlcpy(component, optarg, sizeof(component));
@@ -320,6 +341,12 @@
                else
                        rf_get_device_status(fd);
                break;
+       case RAIDFRAME_PARITYMAP_STATUS:
+               rf_output_pmstat(fd, raidID);
+               break;
+       case RAIDFRAME_PARITYMAP_SET_DISABLE:
+               rf_pm_configure(fd, raidID, parityconf, parityparams);
+               break;
        case RAIDFRAME_REBUILD_IN_PLACE:
                rebuild_in_place(fd, component);
                break;
@@ -467,6 +494,105 @@
 }
 
 static void
+rf_output_pmstat(int fd, int raidID)
+{
+       char srs[7];
+       int i, j, dr;
+       int dis;
+       struct rf_pmstat st;
+
+       do_ioctl(fd, RAIDFRAME_PARITYMAP_STATUS, &st,
+           "RAIDFRAME_PARITYMAP_STATUS");
+       if (st.enabled) {
+               if (0 > humanize_number(srs, 7, st.region_size * DEV_BSIZE, 
+                       "B", HN_AUTOSCALE, HN_NOSPACE))
+                       strlcpy(srs, "???", 7);
+
+               printf("raid%d: parity map enabled with %u regions of %s\n",
+                   raidID, st.params.regions, srs);
+               printf("raid%d: parity cleaned after %d intervals of"
+                   " %d.%03ds\n", raidID, st.params.cooldown,
+                   st.params.tickms / 1000, st.params.tickms % 1000);
+               printf("raid%d: write/sync/clean counters "
+                   "%"PRIu64"/%"PRIu64"/%"PRIu64"\n", raidID,
+                   st.ctrs.nwrite, st.ctrs.ncachesync, st.ctrs.nclearing);
+
+               dr = 0;
+               for (i = 0; i < RF_PARITYMAP_NREG; i++)
+                       if (isset(st.dirty, i))
+                               dr++;
+               printf("raid%d: %d dirty region%s\n", raidID, dr,
+                   dr == 1 ? "" : "s");
+
+               if (verbose > 0) {
+                       for (i = 0; i < RF_PARITYMAP_NBYTE; i += 32) {
+                               printf("    ");
+                               for (j = i; j < RF_PARITYMAP_NBYTE
+                                        && j < i + 32; j++)
+                                       printf("%x%x", st.dirty[j] & 15, 
+                                           (st.dirty[j] >> 4) & 15);
+                               printf("\n");
+                       }
+               }
+       } else {
+               printf("raid%d: parity map disabled\n", raidID);
+       }
+
+       do_ioctl(fd, RAIDFRAME_PARITYMAP_GET_DISABLE, &dis,
+           "RAIDFRAME_PARITYMAP_GET_DISABLE");
+       printf("raid%d: parity map will %s %sabled on next configure\n", 
+           raidID, dis == st.enabled ? "be" : "remain", dis ? "dis" : "en");
+}
+
+static void
+rf_pm_configure(int fd, int raidID, char *parityconf, int parityparams[])
+{
+       int dis;
+       struct rf_pmparams params;
+
+       if (strcasecmp(parityconf, "yes") == 0)
+               dis = 0;
+       else if (strcasecmp(parityconf, "no") == 0)
+               dis = 1;
+       else if (strcasecmp(parityconf, "set") == 0) {
+               params.cooldown = parityparams[0];
+               params.tickms = parityparams[1];
+               params.regions = parityparams[2];
+               
+               do_ioctl(fd, RAIDFRAME_PARITYMAP_SET_PARAMS, &params,
+                   "RAIDFRAME_PARITYMAP_SET_PARAMS");
+
+               if (params.cooldown != 0 || params.tickms != 0) {
+                       printf("raid%d: parity cleaned after", raidID);
+                       if (params.cooldown != 0)
+                               printf(" %d", params.cooldown);
+                       printf(" intervals");
+                       if (params.tickms != 0) {
+                               printf(" of %d.%03ds", params.tickms / 1000,
+                                   params.tickms % 1000);
+                       }
+                       printf("\n");
+               }
+               if (params.regions != 0)
+                       printf("raid%d: will use %d regions on next"
+                           " configuration\n", raidID, params.regions);
+
+               return;
+               /* XXX the control flow here could be prettier. */
+       } else {
+               fprintf(stderr, "%s: \"%s\" is not a valid parity map command"
+                   "\n", getprogname(), parityconf);
+               exit(1);
+       }
+
+       do_ioctl(fd, RAIDFRAME_PARITYMAP_SET_DISABLE, &dis,
+           "RAIDFRAME_PARITYMAP_SET_DISABLE");
+       printf("raid%d: parity map will be %sabled on next configure\n", 
+           raidID, dis ? "dis" : "en");
+}
+
+
+static void
 rf_output_configuration(int fd, const char *name)
 {
        RF_DeviceConfig_t device_config;
@@ -1034,7 +1160,7 @@
        const char *progname = getprogname();
 
        fprintf(stderr, "usage: %s [-v] -a component dev\n", progname);
-       fprintf(stderr, "       %s [-v] -A yes | no | root dev\n", progname);
+       fprintf(stderr, "       %s [-v] -A [yes | no | root] dev\n", progname);
        fprintf(stderr, "       %s [-v] -B dev\n", progname);
        fprintf(stderr, "       %s [-v] -c config_file dev\n", progname);
        fprintf(stderr, "       %s [-v] -C config_file dev\n", progname);
@@ -1044,6 +1170,9 @@
        fprintf(stderr, "       %s [-v] -G dev\n", progname);
        fprintf(stderr, "       %s [-v] -i dev\n", progname);
        fprintf(stderr, "       %s [-v] -I serial_number dev\n", progname);
+       fprintf(stderr, "       %s [-v] -m dev\n", progname);
+       fprintf(stderr, "       %s [-v] -M [yes | no | set params] dev\n",
+           progname);
        fprintf(stderr, "       %s [-v] -p dev\n", progname);
        fprintf(stderr, "       %s [-v] -P dev\n", progname);
        fprintf(stderr, "       %s [-v] -r component dev\n", progname); 
diff -r f8a0fdb19ca5 -r c3151a70b623 sys/dev/raidframe/files.raidframe
--- a/sys/dev/raidframe/files.raidframe Tue Nov 17 18:44:33 2009 +0000
+++ b/sys/dev/raidframe/files.raidframe Tue Nov 17 18:54:26 2009 +0000
@@ -1,4 +1,4 @@
-#      $NetBSD: files.raidframe,v 1.7 2009/01/18 16:37:19 christos Exp $
+#      $NetBSD: files.raidframe,v 1.8 2009/11/17 18:54:26 jld Exp $
 
 defflag                        RAID_AUTOCONFIG
 defflag                        RAID_DIAGNOSTIC
@@ -42,6 +42,7 @@
 file   dev/raidframe/rf_paritylogDiskMgr.c     raid
 file   dev/raidframe/rf_paritylogging.c        raid
 file   dev/raidframe/rf_parityloggingdags.c    raid
+file   dev/raidframe/rf_paritymap.c            raid



Home | Main Index | Thread Index | Old Index