Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src New ioctl DIOCGSECTORALIGN returns sector alignment parameters.



details:   https://anonhg.NetBSD.org/src/rev/d840cf9a1cfb
branches:  trunk
changeset: 745454:d840cf9a1cfb
user:      riastradh <riastradh%NetBSD.org@localhost>
date:      Mon Mar 02 16:01:56 2020 +0000

description:
New ioctl DIOCGSECTORALIGN returns sector alignment parameters.

struct disk_sectoralign {
        /* First aligned sector number.  */
        uint32_t dsa_firstaligned;

        /* Number of sectors per aligned unit.  */
        uint32_t dsa_alignment;
};

- Teach wd(4) to get it from ATA.
- Teach cgd(4) to pass it through from the underlying disk.
- Teach dk(4) to pass it through with adjustments.
- Teach zpool (zfs) to take advantage of it.
  => XXX zpool doesn't seem to understand when the vdev's starting
     sector is misaligned.

Missing:

- ccd(4) and raidframe(4) support -- these should support _using_
  DIOCGSECTORALIGN to decide where to start putting ccd or raid
  stripes on disk, and these should perhaps _implement_
  DIOCGSECTORALIGN by reporting the stripe/interleave factor.

- sd(4) support -- I don't know any obvious way to get it from SCSI,
  but if any SCSI wizards know better than I, please feel free to
  teach sd(4) about it!

- any ld(4) attachments -- might be worth teaching the ld drivers for
  nvme and various raid controllers to get the aligned sector size

There's some duplicate logic here for now.  I'm doing it this way,
rather than gathering the logic into a new disklabel_sectoralign
function or something, so that this change is limited to adding a new
ioctl, without any new kernel symbols, in order to make it easy to
pull up to netbsd-9 without worrying about the module ABI.

diffstat:

 external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c |  19 ++++++-
 sys/dev/ata/wd.c                                       |  51 ++++++++++++++++-
 sys/dev/ata/wdvar.h                                    |   3 +-
 sys/dev/cgd.c                                          |  31 ++++++++++-
 sys/dev/dkwedge/dk.c                                   |  21 ++++++-
 sys/sys/disk.h                                         |   8 ++-
 sys/sys/dkio.h                                         |   5 +-
 7 files changed, 127 insertions(+), 11 deletions(-)

diffs (275 lines):

diff -r 39e5dfdcdead -r d840cf9a1cfb external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c    Mon Mar 02 16:01:52 2020 +0000
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c    Mon Mar 02 16:01:56 2020 +0000
@@ -151,6 +151,7 @@
        unsigned secsize;
        struct disk *pdk;
        struct dkwedge_info dkw;
+       struct disk_sectoralign dsa;
 
        /*
         * We must have a pathname, and it must be absolute.
@@ -260,7 +261,23 @@
        *max_psize = *psize;
 
        *ashift = highbit(MAX(secsize, SPA_MINBLOCKSIZE)) - 1;
-       *pashift = *ashift;
+
+       /*
+        * Try to determine whether the disk has a preferred physical
+        * sector size even if it can emulate a smaller logical sector
+        * size with r/m/w cycles, e.g. a disk with 4096-byte sectors
+        * that for compatibility claims to support 512-byte ones.
+        */
+       if (VOP_IOCTL(vp, DIOCGSECTORALIGN, &dsa, FREAD, NOCRED) == 0) {
+               *pashift = highbit(dsa.dsa_alignment * secsize) - 1;
+               if (dsa.dsa_firstaligned % dsa.dsa_alignment)
+                       printf("ZFS WARNING: vdev %s: sectors are misaligned"
+                           " (alignment=%"PRIu32", firstaligned=%"PRIu32")\n",
+                           vd->vdev_path,
+                           dsa.dsa_alignment, dsa.dsa_firstaligned);
+       } else {
+               *pashift = *ashift;
+       }
 
        vd->vdev_wholedisk = 0;
        if (getdiskinfo(vp, &dkw) != 0 &&
diff -r 39e5dfdcdead -r d840cf9a1cfb sys/dev/ata/wd.c
--- a/sys/dev/ata/wd.c  Mon Mar 02 16:01:52 2020 +0000
+++ b/sys/dev/ata/wd.c  Mon Mar 02 16:01:56 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: wd.c,v 1.458 2020/03/01 03:21:54 riastradh Exp $ */
+/*     $NetBSD: wd.c,v 1.459 2020/03/02 16:01:56 riastradh Exp $ */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
@@ -54,7 +54,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.458 2020/03/01 03:21:54 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.459 2020/03/02 16:01:56 riastradh Exp $");
 
 #include "opt_ata.h"
 #include "opt_wd.h"
@@ -427,16 +427,40 @@
        } else {
                wd->sc_blksize = 512;
        }
+       wd->sc_sectoralign.dsa_firstaligned = 0;
+       wd->sc_sectoralign.dsa_alignment = 1;
+       if ((wd->sc_params.atap_secsz & ATA_SECSZ_VALID_MASK) == ATA_SECSZ_VALID
+           && ((wd->sc_params.atap_secsz & ATA_SECSZ_LPS) != 0)) {
+               wd->sc_sectoralign.dsa_alignment = 1 <<
+                   (wd->sc_params.atap_secsz & ATA_SECSZ_LPS_SZMSK);
+               if ((wd->sc_params.atap_logical_align & ATA_LA_VALID_MASK) ==
+                   ATA_LA_VALID) {
+                       wd->sc_sectoralign.dsa_firstaligned =
+                           (wd->sc_sectoralign.dsa_alignment -
+                               (wd->sc_params.atap_logical_align &
+                                   ATA_LA_MASK));
+               }
+       }
        wd->sc_capacity512 = (wd->sc_capacity * wd->sc_blksize) / DEV_BSIZE;
        format_bytes(pbuf, sizeof(pbuf), wd->sc_capacity * wd->sc_blksize);
        aprint_normal_dev(self, "%s, %d cyl, %d head, %d sec, "
-           "%d bytes/sect x %llu sectors\n",
+           "%d bytes/sect x %llu sectors",
            pbuf,
            (wd->sc_flags & WDF_LBA) ? (int)(wd->sc_capacity /
                (wd->sc_params.atap_heads * wd->sc_params.atap_sectors)) :
                wd->sc_params.atap_cylinders,
            wd->sc_params.atap_heads, wd->sc_params.atap_sectors,
            wd->sc_blksize, (unsigned long long)wd->sc_capacity);
+       if (wd->sc_sectoralign.dsa_alignment != 1) {
+               aprint_normal(" (%d bytes/physsect",
+                   wd->sc_sectoralign.dsa_alignment & wd->sc_blksize);
+               if (wd->sc_sectoralign.dsa_firstaligned != 0) {
+                       aprint_normal("; first aligned sector: %jd",
+                           (intmax_t)wd->sc_sectoralign.dsa_firstaligned);
+               }
+               aprint_normal(")");
+       }
+       aprint_normal("\n");
 
        ATADEBUG_PRINT(("%s: atap_dmatiming_mimi=%d, atap_dmatiming_recom=%d\n",
            device_xname(self), wd->sc_params.atap_dmatiming_mimi,
@@ -1406,6 +1430,27 @@
                return(error1);
                }
 
+       case DIOCGSECTORALIGN: {
+               struct disk_sectoralign *dsa = addr;
+               int part = WDPART(dev);
+
+               *dsa = wd->sc_sectoralign;
+               if (part != RAW_PART) {
+                       struct disklabel *lp = dksc->sc_dkdev.dk_label;
+                       daddr_t offset = lp->d_partitions[part].p_offset;
+                       uint32_t r = offset % dsa->dsa_alignment;
+
+                       if (r < dsa->dsa_firstaligned)
+                               dsa->dsa_firstaligned = dsa->dsa_firstaligned
+                                   - r;
+                       else
+                               dsa->dsa_firstaligned = (dsa->dsa_firstaligned
+                                   + dsa->dsa_alignment) - r;
+               }
+
+               return 0;
+       }
+
        default:
                return dk_ioctl(dksc, dev, cmd, addr, flag, l);
        }
diff -r 39e5dfdcdead -r d840cf9a1cfb sys/dev/ata/wdvar.h
--- a/sys/dev/ata/wdvar.h       Mon Mar 02 16:01:52 2020 +0000
+++ b/sys/dev/ata/wdvar.h       Mon Mar 02 16:01:56 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: wdvar.h,v 1.49 2019/04/05 18:23:45 bouyer Exp $        */
+/*     $NetBSD: wdvar.h,v 1.50 2020/03/02 16:01:56 riastradh Exp $     */
 
 /*
  * Copyright (c) 1998, 2001 Manuel Bouyer.
@@ -59,6 +59,7 @@
        uint64_t sc_capacity512; /* ... in DEV_BSIZE blocks */
        uint32_t sc_capacity28; /* capacity accessible with LBA28 commands */
        uint32_t sc_blksize; /* logical block size, in bytes */
+       struct disk_sectoralign sc_sectoralign; /* sector alignment */
 
 #ifdef WD_SOFTBADSECT
        SLIST_HEAD(, disk_badsectors)   sc_bslist;
diff -r 39e5dfdcdead -r d840cf9a1cfb sys/dev/cgd.c
--- a/sys/dev/cgd.c     Mon Mar 02 16:01:52 2020 +0000
+++ b/sys/dev/cgd.c     Mon Mar 02 16:01:56 2020 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: cgd.c,v 1.120 2020/03/01 03:21:54 riastradh Exp $ */
+/* $NetBSD: cgd.c,v 1.121 2020/03/02 16:01:56 riastradh Exp $ */
 
 /*-
  * Copyright (c) 2002 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.120 2020/03/01 03:21:54 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.121 2020/03/02 16:01:56 riastradh Exp $");
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -716,6 +716,33 @@
                 * We pass this call down to the underlying disk.
                 */
                return VOP_IOCTL(cs->sc_tvn, cmd, data, flag, l->l_cred);
+       case DIOCGSECTORALIGN: {
+               struct disk_sectoralign *dsa = data;
+               int error;
+
+               if (!DK_ATTACHED(dksc))
+                       return ENOENT;
+
+               /* Get the underlying disk's sector alignment.  */
+               error = VOP_IOCTL(cs->sc_tvn, cmd, data, flag, l->l_cred);
+               if (error)
+                       return error;
+
+               /* Adjust for the disklabel partition if necessary.  */
+               if (part != RAW_PART) {
+                       struct disklabel *lp = dksc->sc_dkdev.dk_label;
+                       daddr_t offset = lp->d_partitions[part].p_offset;
+                       uint32_t r = offset % dsa->dsa_alignment;
+
+                       if (r < dsa->dsa_firstaligned)
+                               dsa->dsa_firstaligned = dsa->dsa_firstaligned
+                                   - r;
+                       else
+                               dsa->dsa_firstaligned = (dsa->dsa_firstaligned
+                                   + dsa->dsa_alignment) - r;
+               }
+               return 0;
+       }
        case DIOCGSTRATEGY:
        case DIOCSSTRATEGY:
                if (!DK_ATTACHED(dksc))
diff -r 39e5dfdcdead -r d840cf9a1cfb sys/dev/dkwedge/dk.c
--- a/sys/dev/dkwedge/dk.c      Mon Mar 02 16:01:52 2020 +0000
+++ b/sys/dev/dkwedge/dk.c      Mon Mar 02 16:01:56 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: dk.c,v 1.99 2020/03/01 03:19:46 riastradh Exp $        */
+/*     $NetBSD: dk.c,v 1.100 2020/03/02 16:01:56 riastradh Exp $       */
 
 /*-
  * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.99 2020/03/01 03:19:46 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.100 2020/03/02 16:01:56 riastradh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_dkwedge.h"
@@ -1503,7 +1503,24 @@
 
                break;
            }
+       case DIOCGSECTORALIGN:
+           {
+               struct disk_sectoralign *dsa = data;
+               uint32_t r;
 
+               error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
+                   l != NULL ? l->l_cred : NOCRED);
+               if (error)
+                       break;
+
+               r = sc->sc_offset % dsa->dsa_alignment;
+               if (r < dsa->dsa_firstaligned)
+                       dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
+               else
+                       dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
+                           dsa->dsa_alignment) - r;
+               break;
+           }
        default:
                error = ENOTTY;
        }
diff -r 39e5dfdcdead -r d840cf9a1cfb sys/sys/disk.h
--- a/sys/sys/disk.h    Mon Mar 02 16:01:52 2020 +0000
+++ b/sys/sys/disk.h    Mon Mar 02 16:01:56 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: disk.h,v 1.74 2019/05/22 08:47:02 hannken Exp $        */
+/*     $NetBSD: disk.h,v 1.75 2020/03/02 16:01:56 riastradh Exp $      */
 
 /*-
  * Copyright (c) 1996, 1997, 2004 The NetBSD Foundation, Inc.
@@ -300,6 +300,12 @@
        size_t dks_paramlen;            /* notyet; should be 0 */
 };
 
+/* Sector alignment */
+struct disk_sectoralign {
+       uint32_t        dsa_firstaligned; /* first aligned sector # */
+       uint32_t        dsa_alignment;    /* sectors per aligned sector */
+};
+
 #ifdef _KERNEL
 #include <sys/device.h>
 #include <sys/mutex.h>
diff -r 39e5dfdcdead -r d840cf9a1cfb sys/sys/dkio.h
--- a/sys/sys/dkio.h    Mon Mar 02 16:01:52 2020 +0000
+++ b/sys/sys/dkio.h    Mon Mar 02 16:01:56 2020 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: dkio.h,v 1.25 2019/03/27 19:13:33 martin Exp $ */
+/*     $NetBSD: dkio.h,v 1.26 2020/03/02 16:01:56 riastradh Exp $      */
 
 /*
  * Copyright (c) 1987, 1988, 1993
@@ -133,4 +133,7 @@
                /* mass removal */
 #define        DIOCRMWEDGES    _IOR('d', 134, int)     /* remove all wedges */
 
+               /* sector alignment */
+#define        DIOCGSECTORALIGN _IOR('d', 135, struct disk_sectoralign)
+
 #endif /* _SYS_DKIO_H_ */



Home | Main Index | Thread Index | Old Index