Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src Implement (and document!) "dkctl <dev> badsector .." to `man...



details:   https://anonhg.NetBSD.org/src/rev/f0bff2b52a43
branches:  trunk
changeset: 545714:f0bff2b52a43
user:      darrenr <darrenr%NetBSD.org@localhost>
date:      Tue Apr 15 18:27:26 2003 +0000

description:
Implement (and document!) "dkctl <dev> badsector .." to `manage' the bad
sector information in the kernel.  Doing this uncovered some shortcomings
that should have been pretty obvious with the code committed prior, addressing
the major kludge with a new struct - disk_bacsecinfo to be passed into
DIOCBSLIST.

diffstat:

 sbin/dkctl/dkctl.8  |   17 +++++-
 sbin/dkctl/dkctl.c  |  156 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 sys/dev/ata/wd.c    |   48 ++++++++++++---
 sys/dev/ata/wdvar.h |    3 +-
 sys/sys/disk.h      |   14 +++-
 sys/sys/dkio.h      |    4 +-
 6 files changed, 223 insertions(+), 19 deletions(-)

diffs (truncated from 404 to 300 lines):

diff -r 58158f8b3397 -r f0bff2b52a43 sbin/dkctl/dkctl.8
--- a/sbin/dkctl/dkctl.8        Tue Apr 15 17:42:44 2003 +0000
+++ b/sbin/dkctl/dkctl.8        Tue Apr 15 18:27:26 2003 +0000
@@ -1,4 +1,4 @@
-.\"    $NetBSD: dkctl.8,v 1.4 2002/10/01 13:40:29 wiz Exp $
+.\"    $NetBSD: dkctl.8,v 1.5 2003/04/15 18:27:28 darrenr Exp $
 .\"
 .\" Copyright 2002 Wasabi Systems, Inc.
 .\" All rights reserved.
@@ -101,6 +101,21 @@
 is specified, drop if
 .Ar no
 is specified.)
+.Pp
+.Nm badsector
+.Ar flush | list | retry
+.Pp
+Used for managing the kernel's bad sector list for wd* devices.
+.Bl -tag -width indent
+.It flush
+Clears the in kernel list of bad sectors.
+.It list
+Prints out the list of bad sector ranges recorded by the kernel.
+.It retry
+Flushes the in kernel list and then retries all of the previously recorded
+bad sectors, causing the list to self update.
+.El
+.Pp
 .Sh SEE ALSO
 .Xr ioctl 2 ,
 .Xr sd 4 ,
diff -r 58158f8b3397 -r f0bff2b52a43 sbin/dkctl/dkctl.c
--- a/sbin/dkctl/dkctl.c        Tue Apr 15 17:42:44 2003 +0000
+++ b/sbin/dkctl/dkctl.c        Tue Apr 15 18:27:26 2003 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: dkctl.c,v 1.2 2002/07/01 18:49:57 yamt Exp $   */
+/*     $NetBSD: dkctl.c,v 1.3 2003/04/15 18:27:28 darrenr Exp $        */
 
 /*
  * Copyright 2001 Wasabi Systems, Inc.
@@ -42,6 +42,8 @@
 #include <sys/param.h>
 #include <sys/ioctl.h>
 #include <sys/dkio.h>
+#include <sys/disk.h>
+#include <sys/queue.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -59,6 +61,10 @@
 #define        NO_STR  "no"
 #define YESNO_ARG      YES_STR " | " NO_STR
 
+#ifndef PRIdaddr
+#define PRIdaddr PRId64
+#endif
+
 struct command {
        const char *cmd_name;
        const char *arg_names;
@@ -81,6 +87,7 @@
 void   disk_setcache(int, char *[]);
 void   disk_synccache(int, char *[]);
 void   disk_keeplabel(int, char *[]);
+void   disk_badsectors(int, char *[]);
 
 struct command commands[] = {
        { "getcache",
@@ -103,6 +110,11 @@
          disk_keeplabel,
          O_RDWR },
 
+       { "badsector",
+         "flush | list | retry",
+          disk_badsectors,
+          O_RDWR },
+
        { NULL,
          NULL,
          NULL,
@@ -261,6 +273,148 @@
                err(1, "%s: keep label", dvname);
 }
 
+
+void
+disk_badsectors(int argc, char *argv[])
+{
+       struct disk_badsectors *dbs, *dbs2, buffer[200];
+       SLIST_HEAD(, disk_badsectors) dbstop;
+       struct disk_badsecinfo dbsi;
+       daddr_t blk, totbad, bad;
+       u_int32_t count, size;
+       struct stat sb;
+       u_char *block;
+
+       if (argc != 1)
+               usage();
+
+       if (strcmp(argv[0], "list") == 0) {
+               /*
+                * Copy the list of kernel bad sectors out in chunks that fit
+                * into buffer[].  Updating dbsi_skip means we don't sit here
+                * forever only getting the first chunk that fit in buffer[].
+                */
+               dbsi.dbsi_buffer = (caddr_t)buffer;
+               dbsi.dbsi_bufsize = sizeof(buffer);
+               dbsi.dbsi_skip = 0;
+               dbsi.dbsi_copied = 0;
+               dbsi.dbsi_left = 0;
+
+               do {
+                       if (ioctl(fd, DIOCBSLIST, (caddr_t)&dbsi) == -1)
+                               err(1, "%s: badsectors list", dvname);
+
+                       dbs = (struct disk_badsectors *)dbsi.dbsi_buffer;
+                       for (count = dbsi.dbsi_copied; count > 0; count--) {
+                               printf("%s: blocks %d - %d failed at %s",
+                                       dvname, dbs->dbs_min, dbs->dbs_max,
+                                       ctime(&dbs->dbs_failedat.tv_sec));
+                       }
+                       dbsi.dbsi_skip += dbsi.dbsi_copied;
+               } while (dbsi.dbsi_left != 0);
+
+       } else if (strcmp(argv[0], "flush") == 0) {
+               if (ioctl(fd, DIOCBSFLUSH) == -1)
+                       err(1, "%s: badsectors flush", dvname);
+
+       } else if (strcmp(argv[0], "retry") == 0) {
+               /*
+                * Enforce use of raw device here because the block device
+                * causes access to blocks to be clustered in a larger group,
+                * making it impossible to determine which individual sectors
+                * are the cause of a problem.
+                */ 
+               if (fstat(fd, &sb) == -1)
+                       err(1, "fstat");
+
+               if (!S_ISCHR(sb.st_mode)) {
+                       fprintf(stderr, "'badsector retry' must be used %s\n",
+                               "with character device");
+                       exit(1);
+               }
+
+               SLIST_INIT(&dbstop);
+
+               /*
+                * Build up a copy of the in-kernel list in a number of stages.
+                * That the list we build up here is in the reverse order to
+                * the kernel's is of no concern.
+                */
+               dbsi.dbsi_buffer = (caddr_t)buffer;
+               dbsi.dbsi_bufsize = sizeof(buffer);
+               dbsi.dbsi_skip = 0;
+               dbsi.dbsi_copied = 0;
+               dbsi.dbsi_left = 0;
+
+               do {
+                       if (ioctl(fd, DIOCBSLIST, (caddr_t)&dbsi) == -1)
+                               err(1, "%s: badsectors list", dvname);
+
+                       dbs = (struct disk_badsectors *)dbsi.dbsi_buffer;
+                       for (count = dbsi.dbsi_copied; count > 0; count--) {
+                               dbs2 = malloc(sizeof(*dbs2));
+                               *dbs2 = *dbs;
+                               SLIST_INSERT_HEAD(&dbstop, dbs2, dbs_next);
+                       }
+                       dbsi.dbsi_skip += dbsi.dbsi_copied;
+               } while (dbsi.dbsi_left != 0);
+
+               /*
+                * Just calculate and print out something that will hopefully
+                * provide some useful information about what's going to take
+                * place next (if anything.)
+                */
+               bad = 0;
+               totbad = 0;
+               block = calloc(1, DEV_BSIZE);
+               SLIST_FOREACH(dbs, &dbstop, dbs_next) {
+                       bad++;
+                       totbad += dbs->dbs_max - dbs->dbs_min + 1;
+               }
+
+               printf("%s: bad sector clusters %"PRIdaddr
+                   " total sectors %"PRIdaddr"\n", dvname, bad, totbad);
+
+               /*
+                * Clear out the kernel's list of bad sectors, ready for us
+                * to test all those it thought were bad.
+                */
+               if (ioctl(fd, DIOCBSFLUSH) == -1)
+                       err(1, "%s: badsectors flush", dvname);
+
+               printf("%s: bad sectors flushed\n", dvname);
+
+               /*
+                * For each entry we obtained from the kernel, retry each
+                * individual sector recorded as bad by seeking to it and
+                * attempting to read it in.  Print out a line item for each
+                * bad block we verify.
+                *
+                * PRIdaddr is used here because the type of dbs_max is daddr_t
+                * and that may be either a 32bit or 64bit number(!)
+                */
+               SLIST_FOREACH(dbs, &dbstop, dbs_next) {
+                       printf("%s: Retrying %"PRIdaddr" - %"
+                           PRIdaddr"\n", dvname, dbs->dbs_min, dbs->dbs_max);
+
+                       for (blk = dbs->dbs_min; blk <= dbs->dbs_max; blk++) {
+                               if (lseek(fd, (off_t)blk * DEV_BSIZE,
+                                   SEEK_SET) == -1) {
+                                       warn("%s: lseek block %d", dvname,
+                                            blk);
+                                       continue;
+                               }
+                               printf("%s: block %"PRIdaddr" - ", dvname, blk);
+                               if (read(fd, block, DEV_BSIZE) != DEV_BSIZE)
+                                       printf("failed\n");
+                               else
+                                       printf("ok\n");
+                               fflush(stdout);
+                       }
+               }
+       }
+}
+
 /*
  * return YES, NO or -1.
  */
diff -r 58158f8b3397 -r f0bff2b52a43 sys/dev/ata/wd.c
--- a/sys/dev/ata/wd.c  Tue Apr 15 17:42:44 2003 +0000
+++ b/sys/dev/ata/wd.c  Tue Apr 15 18:27:26 2003 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: wd.c,v 1.242 2003/04/15 17:42:44 dogcow Exp $ */
+/*     $NetBSD: wd.c,v 1.243 2003/04/15 18:27:26 darrenr Exp $ */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.242 2003/04/15 17:42:44 dogcow Exp $");
+__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.243 2003/04/15 18:27:26 darrenr Exp $");
 
 #ifndef WDCDEBUG
 #define WDCDEBUG
@@ -430,6 +430,7 @@
                SLIST_REMOVE_HEAD(&sc->sc_bslist, dbs_next);
                free(head, M_TEMP);
        }
+       sc->sc_bscount = 0;
 
        /* locate the major number */
        bmaj = bdevsw_lookup_major(&wd_bdevsw);
@@ -543,10 +544,11 @@
         */
        if (__predict_false(!SLIST_EMPTY(&wd->sc_bslist))) {
                struct disk_badsectors *dbs;
-               daddr_t maxblk = blkno + bp->b_bcount;
+               daddr_t maxblk = blkno + (bp->b_bcount / DEV_BSIZE) - 1;
 
                SLIST_FOREACH(dbs, &wd->sc_bslist, dbs_next)
-                       if (dbs->dbs_min >= blkno && dbs->dbs_max < maxblk)
+                       if ((dbs->dbs_min <= blkno && blkno <= dbs->dbs_max) ||
+                           (dbs->dbs_min <= maxblk && maxblk <= dbs->dbs_max)
                                goto bad;
        }
 
@@ -786,6 +788,7 @@
                        dbs->dbs_max = dbs->dbs_min + bp->b_bcount - 1;
                        microtime(&dbs->dbs_failedat);
                        SLIST_INSERT_HEAD(&wd->sc_bslist, dbs, dbs_next);
+                       wd->sc_bscount++;
                }
 
                bp->b_flags |= B_ERROR;
@@ -1183,23 +1186,45 @@
 
        case DIOCBSLIST :
        {
-               caddr_t laddr = *(caddr_t *)addr;
+               u_int32_t count, missing, skip;
+               struct disk_badsecinfo dbsi;
                struct disk_badsectors *dbs;
                size_t available;
-               u_int32_t count;
+               caddr_t laddr;
 
+               dbsi = *(struct disk_badsecinfo *)addr;
+               missing = wd->sc_bscount;
                count = 0;
-               copyin(laddr, &available, sizeof(available));
-               laddr += sizeof(count);
+               available = dbsi.dbsi_bufsize;
+               skip = dbsi.dbsi_skip;
+               laddr = dbsi.dbsi_buffer;
+
+               /*
+                * We start this loop with the expectation that all of the



Home | Main Index | Thread Index | Old Index