Subject: Re: Supporting sector size != DEV_BSIZE -- patches
To: Trevin Beattie <trevin@xmission.com>
From: Bill Studenmund <wrstuden@netbsd.org>
List: tech-kern
Date: 06/03/2002 12:49:34
On Sat, 1 Jun 2002, Trevin Beattie wrote:

> I've implemented a fix for the ffs filesystem so that all upper-level disk
> operations use DEV_BSIZE blocks, and the actual sector size is left to the
> physical device driver (below the buffer cache).  I've successfully
> created, mounted, and written to a virtual disk using 2048-byte sectors.
> Next thing I need to do is run the patched NetBSD on real hardware (I've
> been using VMware for testing :-) and see if it will work with a SCSI MO
> drive.

I don't think that's the best direction to go. I think it would be better
to do everything in terms of underlying disk blocks. Mainly since physical
block size is a property of the drive, so if you take a drive to another
machine, it will work. If we depend on DEV_BSIZE (and I'm understanding
you right), then different kernels will look in different places.

The main locus of the question is what have people been doing so far when
they use non-512-byte sectors? Yes, we have hacks for CDs. But what have
the MO folks been doing? Any MO users who can speak up?

Also, what does FreeBSD do? We've worked hard to keep our on-disk format
the same. Will this mean changing it?

I think whatever we do should be backwards compatable with them, if
possible.

> Below are the changes I've made.  (The original files are from
> NetBSD-current of 25-May-2002.)  It appears that the changes made to the
> ffs driver may also need to be made in the lfs and ext2fs drivers, but
> since I really don't know how those filesystems were designed, I'll leave
> them to the experts.

Oh, please be careful, your mailer mangled them (it wrapped lines for
you).

> diff -u /usr/src/sbin/newfs/mkfs.c /home/trevin/src/sbin/newfs/mkfs.c
> ---
> /usr/src/sbin/newfs/mkfs.c	Thu Apr 11 04:17:30 2002
> +++
> /home/trevin/src/sbin/newfs/mkfs.c	Sat Jun 01 19:23:54 2002
> @@ -125,11
> +125,11 @@
>      mode_t mfsmode, uid_t mfsuid, gid_t mfsgid)
>  {
>  	int32_t i,
> mincpc, mincpg, inospercg;
> -	int32_t cylno, rpos, blk, j, warning = 0;
> +
> int32_t cylno, /* rpos, blk, */ j, warning = 0; /* XXX */
>  	int32_t used,
> mincpgcnt, bpcg;
>  	off_t usedb;
>  	int32_t mapcramped, inodecramped;
> -
> int32_t postblsize, rotblsize, totalsbsize;
> +	/* int32_t postblsize,
> rotblsize, totalsbsize; */ /* XXX */
>  	time_t utime;
>  	long long sizepb;
>
> char *writebuf2;		/* dynamic buffer */
> @@ -141,9 +141,9 @@
>  #ifdef MFS
>  	if
> (mfs) {
>  		calc_memfree();
> -		if (fssize * sectorsize > memleft)
> -			fssize
> = memleft / sectorsize;
> -		if ((membase = mkfs_malloc(fssize * sectorsize))
> == 0)
> +		if (fssize * DEV_BSIZE > memleft)
> +			fssize = memleft /
> DEV_BSIZE;
> +		if ((membase = mkfs_malloc(fssize * DEV_BSIZE)) == 0)
>
> exit(12);
>  	}
>  #endif
> @@ -162,7 +162,7 @@
>  	 */
>  	if (fssize <= 0)
>
> printf("preposterous size %d\n", fssize), exit(13);
> -	wtfs(fssize - 1,
> sectorsize, (char *)&sblock);
> +	wtfs(fssize - sectorsize / DEV_BSIZE,
> sectorsize, (char *)&sblock);
>
>  	/*
>  	 * collect and verify the sector and
> track info
> @@ -240,16 +240,23 @@
>  	sblock.fs_nrpos = nrpos;
>
> sblock.fs_nindir = sblock.fs_bsize / sizeof(daddr_t);
>  	sblock.fs_inopb =
> sblock.fs_bsize / DINODE_SIZE;
> -	sblock.fs_nspf = sblock.fs_fsize /
> sectorsize;
> +	sblock.fs_nspf = sblock.fs_fsize / DEV_BSIZE;
>  	for
> (sblock.fs_fsbtodb = 0, i = NSPF(&sblock); i > 1; i >>= 1)
>
> sblock.fs_fsbtodb++;
> +	/* Reserve space for the boot block and primary
> super block.
> +	   The backup super block follows. */
>  	sblock.fs_sblkno =
>
>     roundup(howmany(bbsize + sbsize, sblock.fs_fsize), sblock.fs_frag);
> +
> /* Allocate enough full blocks to store the backup super block */
>
> sblock.fs_cblkno = (daddr_t)(sblock.fs_sblkno +
>
> roundup(howmany(sbsize, sblock.fs_fsize), sblock.fs_frag));
> +	/* Allocate 1
> full block for the cylinder group block */
>  	sblock.fs_iblkno =
> sblock.fs_cblkno + sblock.fs_frag;
> +	/* Set the cylinder group offset to
> the number of frags per track,
> +	   rounded up to the next full block
> boundary. */
>  	sblock.fs_cgoffset = roundup(
> -	    howmany(sblock.fs_nsect,
> NSPF(&sblock)), sblock.fs_frag);
> +	    howmany(sblock.fs_nsect * sectorsize
> / DEV_BSIZE, NSPF(&sblock)),
> +	    sblock.fs_frag);
>  	for (sblock.fs_cgmask
> = 0xffffffff, i = sblock.fs_ntrak; i > 1; i >>= 1)
>  		sblock.fs_cgmask <<=
> 1;
>  	if (!POWEROF2(sblock.fs_ntrak))
> @@ -260,19 +267,23 @@
>
> sblock.fs_maxfilesize += sizepb;
>  	}
>  	/*
> -	 * Validate
> specified/determined secpercyl
> -	 * and calculate minimum cylinders per
> group.
> +	 * Calculate DEV_BSIZE blocks per cylinder
> +	 * and minimum
> cylinders per group.
>  	 */
> -	sblock.fs_spc = secpercyl;
> +	bpcg =
> sblock.fs_nsect * sblock.fs_ntrak * sectorsize;
> +	sblock.fs_spc = bpcg /
> DEV_BSIZE;
>  	for (sblock.fs_cpc = NSPB(&sblock), i = sblock.fs_spc;
>
> sblock.fs_cpc > 1 && (i & 1) == 0;
>  	     sblock.fs_cpc >>= 1, i >>= 1)
>
> /* void */;
>  	mincpc = sblock.fs_cpc;
> -	bpcg = sblock.fs_spc * sectorsize;
>
> 	inospercg = roundup(bpcg / DINODE_SIZE, INOPB(&sblock));
>  	if (inospercg >
> MAXIPG(&sblock))
>  		inospercg = MAXIPG(&sblock);
> +	/*
> +	 * Calculate the
> number of DEV_BSIZE blocks used in a cylinder
> +	 * group by the super
> block, cylinder block, and inode blocks.
> +	 */
>  	used = (sblock.fs_iblkno +
> inospercg / INOPF(&sblock)) * NSPF(&sblock);
>  	mincpgcnt =
> howmany(sblock.fs_cgoffset * (~sblock.fs_cgmask) + used,
>
> sblock.fs_spc);
> @@ -419,6 +430,8 @@
>  	/*
>  	 * Now have size for file system
> and nsect and ntrak.
>  	 * Determine number of cylinders and blocks in the
> file system.
> +	 * WARNING: At this point, the units of fssize changes
> +	 *
>    from DEV_BSIZE blocks to fragments.
>  	 */
>  	sblock.fs_size = fssize =
> dbtofsb(&sblock, fssize);
>  	sblock.fs_ncyl = fssize * NSPF(&sblock) /
> sblock.fs_spc;
> @@ -446,6 +459,12 @@
>  	sblock.fs_npsect = nphyssectors;
>
> sblock.fs_postblformat = FS_DYNAMICPOSTBLFMT;
>  	sblock.fs_sbsize =
> fragroundup(&sblock, sizeof(struct fs));
> +	/*
> +	 * Rotational tables have
> been removed in favor of simplicity.
> +	 */

Uhm, that's not something we should be seeing in a diff dealing with block
sizes. While we may be intreested in dropping rotational table support,
the change shouldn't just slip in.

> +#if 1
> +	sblock.fs_cpc =
> 0;
> +#else /* 0 */
>  	if (sblock.fs_ntrak == 1) {
>  		sblock.fs_cpc = 0;
>
> goto next;
> @@ -495,6 +514,7 @@
>  		fs_postbl(&sblock, cylno)[rpos] = blk;
>
> }
>  next:
> +#endif /* Removed rotational tables */
>  	/*
>  	 * Compute/validate
> number of cylinder groups.
>  	 */
> @@ -516,9 +536,10 @@
>  	if ((i = fssize - j
> * sblock.fs_fpg) < sblock.fs_fpg &&
>  	    cgdmin(&sblock, j) -
> cgbase(&sblock, j) > i) {
>  		if (j == 0) {
> -			printf("File system must
> have at least %d sectors\n",
> +			printf("File system must be at least %d
> Kb\n",
>  			    NSPF(&sblock) *
> -			    (cgdmin(&sblock, 0) + (3 <<
> sblock.fs_fragshift)));
> +			    (cgdmin(&sblock, 0) + (3 <<
> sblock.fs_fragshift))
> +			    * DEV_BSIZE / 1024);
>  			exit(30);
>  		}
>
> printf("Warning: inode blocks/cyl group (%d) >= "
> @@ -528,7 +549,7 @@
>
>  i >> sblock.fs_fragshift);
>  		printf("    cylinder group. This implies %d
> sector(s) "
>  			"cannot be allocated.\n",
> -		    i * NSPF(&sblock));
> +
> i * NSPF(&sblock) * DEV_BSIZE / sectorsize);
>  		sblock.fs_ncg--;
>
> sblock.fs_ncyl -= sblock.fs_ncyl % sblock.fs_cpg;
>  		sblock.fs_size =
> fssize = sblock.fs_ncyl * sblock.fs_spc /
> @@ -536,10 +557,10 @@
>  		warning
> = 0;
>  	}
>  	if (warning && !mfs) {
> -		printf("Warning: %d sector(s) in last
> cylinder unallocated\n",
> -		    sblock.fs_spc -
> -		    (fssize *
> NSPF(&sblock) - (sblock.fs_ncyl - 1)
> -		    * sblock.fs_spc));
> +
> printf("Warning: %d Kb in last cylinder unallocated\n",
> +
> (sblock.fs_spc -
> +		     (fssize * NSPF(&sblock) - (sblock.fs_ncyl - 1)
> +
>      * sblock.fs_spc)) * DEV_BSIZE / 1024);
>  	}
>  	/*
>  	 * fill in remaining
> fields of the super block
> @@ -578,9 +599,10 @@
>  	 * Dump out summary
> information about file system.
>  	 */
>  	if (!mfs) {
> -		printf("%s:\t%d
> sectors in %d %s of %d tracks, %d sectors\n",
> -		    fsys, sblock.fs_size *
> NSPF(&sblock), sblock.fs_ncyl,
> -		    "cylinders", sblock.fs_ntrak,
> sblock.fs_nsect);
> +		printf("%s:\t%d sectors in %d cylinders"
> +		       "
> of %d tracks, %d sectors\n",
> +		    fsys, sblock.fs_size * (sblock.fs_fsize
> / sectorsize),
> +		    sblock.fs_ncyl, sblock.fs_ntrak, sblock.fs_nsect);
>
> #define	B2MBFACTOR (1 / (1024.0 * 1024.0))
>  		printf("\t%.1fMB in %d cyl
> groups (%d c/g, %.2fMB/g, %d i/g)\n",
>  		    (float)sblock.fs_size *
> sblock.fs_fsize * B2MBFACTOR,
> @@ -627,7 +649,7 @@
>  	memcpy(writebuf,
> &sblock, sbsize);
>  	if (needswap)
>  		ffs_sb_swap(&sblock, (struct
> fs*)writebuf);
> -	wtfs((int)SBOFF / sectorsize, sbsize, writebuf);
> +
> wtfs((int)SBLOCK, sbsize, writebuf);
>  	/*
>  	 * Write out the duplicate
> super blocks
>  	 */
> @@ -1031,7 +1053,7 @@
>  	ipg = 0;
>  	for (i = 0; i < 10;
> i++) {
>  		usedb = (sblock.fs_iblkno + ipg / INOPF(&sblock))
> -			*
> NSPF(&sblock) * (off_t)sectorsize;
> +			* NSPF(&sblock) * (off_t)DEV_BSIZE;
>
> 		new_ipg = (cylpg * (long long)bpcg - usedb) /
>  		    (long long)density *
> fssize / (ncg * secpercyl * cylpg);
>  		if (new_ipg <= 0)
> @@ -1101,12
> +1123,12 @@
>
>  #ifdef MFS
>  	if (mfs) {
> -		memmove(bf, membase + bno *
> sectorsize, size);
> +		memmove(bf, membase + bno * DEV_BSIZE, size);
>
> return;
>  	}
>  #endif
>  	offset = bno;
> -	offset *= sectorsize;
> +	offset *=
> DEV_BSIZE;
>  	if (lseek(fsi, offset, SEEK_SET) < 0) {
>  		printf("rdfs: seek
> error for sector %d: %s\n",
>  		    bno, strerror(errno));
> @@ -1131,14
> +1153,14 @@
>
>  #ifdef MFS
>  	if (mfs) {
> -		memmove(membase + bno *
> sectorsize, bf, size);
> +		memmove(membase + bno * DEV_BSIZE, bf, size);
>
> return;
>  	}
>  #endif
>  	if (Nflag)
>  		return;
>  	offset = bno;
> -	offset *=
> sectorsize;
> +	offset *= DEV_BSIZE;
>  	if (lseek(fso, offset, SEEK_SET) < 0)
> {
>  		printf("wtfs: seek error for sector %d: %s\n",
>  		    bno,
> strerror(errno));
> diff -u /usr/src/sbin/newfs/newfs.8
> /home/trevin/src/sbin/newfs/newfs.8
> --- /usr/src/sbin/newfs/newfs.8	Tue Apr
>  9 04:22:20 2002
> +++ /home/trevin/src/sbin/newfs/newfs.8	Sat Jun 01
> 19:38:23 2002
> @@ -149,9 +149,9 @@
>  .Ar block-size
>  .It \&\*[Lt] 20 MB
>  4
> KB
> -.It \&\*[Lt] 1024 MB
> +.It \&\*[Lt] 1000 MB
>  8 KB
> -.It \&\*[Gt]\&= 1024
> MB
> +.It \&\*[Gt]\&= 1000 MB
>  16 KB
>  .El
>  .It Fl c Ar cpg
> @@ -179,7 +179,7
> @@
>  The fragment size of the file system in bytes.
>  It must be a power of
> two ranging in value between
>  .Ar block-size Ns /8
> -and
> +(or the sector
> size, whichever is greater) and
>  .Ar block-size .
>  The optimal
>  .Ar
> block-size Ns : Ns Ar frag-size
> @@ -193,9 +193,9 @@
>  .Ar frag-size
>  .It
> \&\*[Lt] 20 MB
>  0.5 KB
> -.It \&\*[Lt] 1024 MB
> +.It \&\*[Lt] 1000 MB
>  1
> KB
> -.It \&\*[Gt]\&= 1024 MB
> +.It \&\*[Gt]\&= 1000 MB
>  2 KB
>  .El
>  .It Fl g
> Ar avgfilesize
> @@ -215,9 +215,9 @@
>  .Ar bytes-per-inode
>  .It \&\*[Lt] 20
> MB
>  2 KB
> -.It \&\*[Lt] 1024 MB
> +.It \&\*[Lt] 1000 MB
>  4 KB
> -.It \&\*[Gt]\&=
> 1024 MB
> +.It \&\*[Gt]\&= 1000 MB
>  8 KB
>  .El
>  .It Fl m Ar free-space
> @@
> -245,7 +245,7 @@
>  .Xr tunefs 8
>  for more details on how to set this
> option.
>  .It Fl s Ar size
> -The size of the file system in sectors.
> +The
> size of the file system.
>  An
>  .Sq s
>  suffix will be interpreted as the
> number of sectors (the default).
> @@ -269,8 +269,8 @@
>  to find the
> alternative superblocks if the standard superblock is lost.
>  .Bl -tag
> -width Fl
>  .It Fl S Ar sector-size
> -The size of a sector in bytes (almost
> never anything but 512).
> -Defaults to 512.
> +The size of a sector in
> bytes.
> +Defaults to the sector size read from the disk label.
>  .It Fl k Ar
> skew
>  Sector \&0 skew, per track.
>  Used to describe perturbations in the
> media format to compensate for
> @@ -297,8 +297,8 @@
>  The speed of the disk
> in revolutions per minute.
>  .ne 1i
>  .It Fl t Ar ntracks
> -The number of
> tracks per cylinder available for data allocation by the file
> -system.
> +The
> number of tracks per cylinder (heads) available for data
> +allocation by the
> file system.
>  .It Fl u Ar nsectors
>  The number of sectors per track
> available for data allocation by the file
>  system.
> diff -u
> /usr/src/sbin/newfs/newfs.c /home/trevin/src/sbin/newfs/newfs.c
> ---
> /usr/src/sbin/newfs/newfs.c	Wed Feb 20 04:01:03 2002
> +++
> /home/trevin/src/sbin/newfs/newfs.c	Sat Jun  1 20:42:25 2002
> @@ -110,12
> +110,12 @@
>  /*
>   * For file systems smaller than SMALL_FSSIZE we use the
> S_DFL_* defaults,
>   * otherwise if less than MEDIUM_FSSIZE use M_DFL_*,
> otherwise use
> - * L_DFL_*.
> + * L_DFL_*.  SMALL_FSSIZE and MEDIUM_FSSIZE are
> in DEV_BSIZE units.
>   */
> -#define	SMALL_FSSIZE	(20*1024*2)
> +#define
> SMALL_FSSIZE	(20*1024*1024/DEV_BSIZE)
>  #define	S_DFL_FRAGSIZE	512
>  #define
> S_DFL_BLKSIZE	4096
> -#define	MEDIUM_FSSIZE	(1000*1024*2)
> +#define
> MEDIUM_FSSIZE	(1000*1024*1024/DEV_BSIZE)
>  #define	M_DFL_FRAGSIZE	1024
>
> #define	M_DFL_BLKSIZE	8192
>  #define	L_DFL_FRAGSIZE	2048
> @@ -171,7 +171,7
> @@
>  int	mfs;			/* run as the memory based filesystem */
>  int	Nflag;			/*
> run without writing file system */
>  int	Oflag;			/* format as an 4.3BSD
> file system */
> -int	fssize;			/* file system size */
> +int	fssize;			/* file
> system size / DEV_BSIZE */
>  int	ntracks;		/* # tracks/cylinder */
>  int
> nsectors;		/* # sectors/track */
>  int	nphyssectors;		/* # sectors/track
> including spares */
> @@ -278,6 +278,10 @@
>  		case 'S':
>  			sectorsize =
> strsuftoi("sector size",
>  			    optarg, 1, INT_MAX);
> +			if (sectorsize %
> DEV_BSIZE)
> +				errx(1,
> +			    "sector size must be a multiple of %d
> bytes.",
> +				    DEV_BSIZE);
>  			break;
>  #ifdef COMPAT
>  		case 'T':
> @@
> -378,21 +382,18 @@
>  			if (endp[0] != '\0' && endp[1] != '\0')
>  				llsize
> = -1;
>  			else {
> -				int	ssiz;
> -
> -				ssiz = (sectorsize ? sectorsize :
> DFL_SECSIZE);
>  				switch (tolower((unsigned char)endp[0])) {
>  				case
> 'b':
> -					llsize /= ssiz;
> +					llsize /= DEV_BSIZE;
>  					break;
>
> case 'k':
> -					llsize *= 1024 / ssiz;
> +					llsize = llsize * 1024 /
> DEV_BSIZE;
>  					break;
>  				case 'm':
> -					llsize *= 1024 * 1024 /
> ssiz;
> +					llsize *= 1024 * 1024 / DEV_BSIZE;
>  					break;
>  				case
> 'g':
> -					llsize *= 1024 * 1024 * 1024 / ssiz;
> +					llsize *= 1024 * 1024
> * 1024 / DEV_BSIZE;
>  					break;
>  				case '\0':
>  				case 's':
> @@ -446,6
> +447,8 @@
>  			sectorsize = DFL_SECSIZE;
>
>  		if (Fflag && !Nflag) {	/*
> creating image in a regular file */
> +			if (fssize % (sectorsize /
> DEV_BSIZE))
> +				fssize &= ~(sectorsize / DEV_BSIZE - 1);
>  			if (fssize ==
> 0)
>  				errx(1, "need to specify size when using -F");
>  			fso =
> open(special, O_RDWR | O_CREAT | O_TRUNC, 0777);
> @@ -454,7 +457,7 @@
>  			if
> ((fsi = dup(fso)) == -1)
>  				err(1, "can't dup(2) image fd");
>  		/*
> XXXLUKEM: only ftruncate() regular files ? */
> -			if (ftruncate(fso,
> (off_t)fssize * sectorsize) == -1)
> +			if (ftruncate(fso, (off_t)fssize *
> DEV_BSIZE) == -1)
>  				err(1, "can't resize %s to %d",
>  				    special,
> fssize);
>
> @@ -473,7 +476,7 @@
>  				if ((buf = calloc(1, bufsize)) ==
> NULL)
>  					err(1, "can't malloc buffer of %d",
>  					bufsize);
> -				bufrem
> = fssize * sectorsize;
> +				bufrem = fssize * DEV_BSIZE;
>  				printf(
>
> "Creating file system image in `%s', size %lld bytes, in %d byte
> chunks.\n",
>  				    special, (long long)bufrem, bufsize);
> @@ -562,11
> +565,6 @@
>  			errx(1, "`%c' partition type is not `4.2BSD'", *cp);
>  	}	/*
> !Fflag && !mfs */
>
> -	if (fssize == 0)
> -		fssize = pp->p_size;
> -	if (fssize
> > pp->p_size && !mfs && !Fflag)
> -		errx(1, "maximum file system size on the
> `%c' partition is %d",
> -		    *cp, pp->p_size);
>  	if (rpm == 0) {
>  		rpm =
> lp->d_rpm;
>  		if (rpm <= 0)
> @@ -587,6 +585,13 @@
>  		if (sectorsize <= 0)
>
> 	errx(1, "no default sector size");
>  	}
> +	if (fssize == 0)
> +		fssize =
> pp->p_size * (sectorsize / DEV_BSIZE);
> +	if (fssize % (sectorsize /
> DEV_BSIZE))
> +		fssize &= ~(sectorsize / DEV_BSIZE - 1);
> +	if ((fssize >
> pp->p_size * (sectorsize / DEV_BSIZE)) && !mfs && !Fflag)
> +		errx(1,
> "maximum file system size on the `%c' partition is %d",
> +		    *cp,
> pp->p_size);
>  	if (trackskew == -1) {
>  		trackskew = lp->d_trackskew;
>  		if
> (trackskew < 0)
> @@ -727,7 +732,7 @@
>  		else
>  			args.export.ex_flags = 0;
>
> 	args.base = membase;
> -		args.size = fssize * sectorsize;
> +		args.size =
> fssize * DEV_BSIZE;
>  		if (mount(MOUNT_MFS, argv[1], mntflags, &args) < 0)
>
> 			exit(errno); /* parent prints message */
>  	}
> diff -u
> /usr/src/sys/arch/i386/i386/disksubr.c
> /home/trevin/src/sys/arch/i386/i386/disksubr.c
> ---
> /usr/src/sys/arch/i386/i386/disksubr.c	Wed Feb 20 04:10:35 2002
> +++
> /home/trevin/src/sys/arch/i386/i386/disksubr.c	Fri May 31 12:31:00 2002
> @@
> -140,6 +140,7 @@
>  	struct disklabel *dlp;
>  	char *msg = NULL;
>  	int
> dospartoff, cyl, i, *ip;
> +	int sectodbs = lp->d_secsize / DEV_BSIZE;
>
>  	/*
> minimal requirements for archtypal disk label */
>  	if (lp->d_secsize ==
> 0)
> @@ -170,13 +171,13 @@
>
>  	/* do dos partitions in the process of getting
> disklabel? */
>  	dospartoff = 0;
> -	cyl = LABELSECTOR / lp->d_secpercyl;
> +
> cyl = LABELSECTOR / (lp->d_secpercyl * sectodbs);
>  	if (!osdep)
>  		goto
> nombrpart;
>  	dp = osdep->dosparts;
>
>  	/* read master boot record */
> -
> bp->b_blkno = MBR_BBSECTOR;
> +	bp->b_blkno = MBR_BBSECTOR * sectodbs;
>
> bp->b_bcount = lp->d_secsize;
>  	bp->b_flags |= B_READ;
>  	bp->b_cylinder =
> MBR_BBSECTOR / lp->d_secpercyl;
> @@ -244,7 +245,7 @@
>
>  nombrpart:
>  	/*
> next, dig out disk label */
> -	bp->b_blkno = dospartoff + LABELSECTOR;
> +
> bp->b_blkno = dospartoff * sectodbs + LABELSECTOR;
>  	bp->b_cylinder = cyl;
>
> 	bp->b_bcount = lp->d_secsize;
>  	bp->b_flags &= ~(B_DONE);
> @@ -388,6 +389,7
> @@
>  	struct buf *bp;
>  	struct disklabel *dlp;
>  	int error, dospartoff,
> cyl;
> +	int sectodbs = lp->d_secsize / DEV_BSIZE;
>
>  	/* get a buffer and
> initialize it */
>  	bp = geteblk((int)lp->d_secsize);
> @@ -395,13 +397,13 @@
>
>
>  	/* do dos partitions in the process of getting disklabel? */
>
> dospartoff = 0;
> -	cyl = LABELSECTOR / lp->d_secpercyl;
> +	cyl = LABELSECTOR
> / (lp->d_secpercyl * sectodbs);
>  	if (!osdep)
>  		goto nombrpart;
>  	dp =
> osdep->dosparts;
>
>  	/* read master boot record */
> -	bp->b_blkno =
> MBR_BBSECTOR;
> +	bp->b_blkno = MBR_BBSECTOR * sectodbs;
>  	bp->b_bcount =
> lp->d_secsize;
>  	bp->b_flags |= B_READ;
>  	bp->b_cylinder = MBR_BBSECTOR /
> lp->d_secpercyl;
> @@ -433,7 +435,7 @@
>  #endif
>
>  	/* next, dig out disk
> label */
> -	bp->b_blkno = dospartoff + LABELSECTOR;
> +	bp->b_blkno =
> dospartoff * sectodbs + LABELSECTOR;
>  	bp->b_cylinder = cyl;
>  	bp->b_bcount
> = lp->d_secsize;
>  	bp->b_flags &= ~(B_DONE);
> @@ -475,13 +477,14 @@
>  	int
> wlabel;
>  {
>  	struct partition *p = lp->d_partitions +
> DISKPART(bp->b_dev);
> -	int labelsector = lp->d_partitions[2].p_offset +
> LABELSECTOR;
> +	int sectodbs = lp->d_secsize / DEV_BSIZE;
> +	int labelblock =
> lp->d_partitions[2].p_offset * sectodbs + LABELSECTOR;
>  	int sz;
>
> -	sz =
> howmany(bp->b_bcount, lp->d_secsize);
> +	sz = howmany(bp->b_bcount,
> DEV_BSIZE);
>
> -	if (bp->b_blkno + sz > p->p_size) {
> -		sz = p->p_size -
> bp->b_blkno;
> +	if (bp->b_blkno + sz > p->p_size * sectodbs) {
> +		sz =
> p->p_size * sectodbs - bp->b_blkno;
>  		if (sz == 0) {
>  			/* If exactly at
> end of disk, return EOF. */
>  			bp->b_resid = bp->b_bcount;
> @@ -497,9
> +500,9 @@
>  	}
>
>  	/* Overwriting disk label? */
> -	if (bp->b_blkno +
> p->p_offset <= labelsector &&
> +	if (bp->b_blkno + p->p_offset * sectodbs <=
> labelblock &&
>  #if LABELSECTOR != 0
> -	    bp->b_blkno + p->p_offset + sz >
> labelsector &&
> +	    bp->b_blkno + p->p_offset * sectodbs + sz > labelblock
> &&
>  #endif
>  	    (bp->b_flags & B_READ) == 0 && !wlabel) {
>  		bp->b_error =
> EROFS;
> @@ -507,8 +510,8 @@
>  	}
>
>  	/* calculate cylinder for disksort to
> order transfers with */
> -	bp->b_cylinder = (bp->b_blkno + p->p_offset) /
> -
>    (lp->d_secsize / DEV_BSIZE) / lp->d_secpercyl;
> +	bp->b_cylinder =
> (bp->b_blkno / sectodbs + p->p_offset)
> +		/ lp->d_secpercyl;
>  	return (1);
>
>
>  bad:
> diff -u /usr/src/sys/ufs/ffs/ffs_vfsops.c
> /home/trevin/src/sys/ufs/ffs/ffs_vfsops.c
> ---
> /usr/src/sys/ufs/ffs/ffs_vfsops.c	Wed Apr 10 04:38:14 2002
> +++
> /home/trevin/src/sys/ufs/ffs/ffs_vfsops.c	Sat Jun 01 18:28:55 2002
> @@
> -441,7 +441,6 @@
>  	void *space;
>  	struct buf *bp;
>  	struct fs *fs,
> *newfs;
> -	struct partinfo dpart;
>  	int i, blks, size, error;
>  	int32_t
> *lp;
>  	caddr_t cp;
> @@ -460,11 +459,8 @@
>  	/*
>  	 * Step 2: re-read
> superblock from disk.
>  	 */
> -	if (VOP_IOCTL(devvp, DIOCGPART,
> (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
> -		size = DEV_BSIZE;
> -	else
> -
> size = dpart.disklab->d_secsize;
> -	error = bread(devvp, (ufs_daddr_t)(SBOFF
> / size), SBSIZE, NOCRED, &bp);
> +	error = bread(devvp, (ufs_daddr_t)(SBOFF /
> DEV_BSIZE), SBSIZE,
> +		      NOCRED, &bp);
>  	if (error) {
>  		brelse(bp);

This one will be correct, as the buffer cache is in terms of DEV_BSIZE.

> return (error);
> @@ -611,7 +607,6 @@
>  	struct buf *bp;
>  	struct fs *fs;
>
> dev_t dev;
> -	struct partinfo dpart;
>  	void *space;
>  	int blks;
>  	int error,
> i, size, ronly;
> @@ -645,14 +640,10 @@
>  	error = VOP_OPEN(devvp, ronly ?
> FREAD : FREAD|FWRITE, FSCRED, p);
>  	if (error)
>  		return (error);
> -	if
> (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
> -		size
> = DEV_BSIZE;
> -	else
> -		size = dpart.disklab->d_secsize;
> -
>  	bp = NULL;
>
> ump = NULL;
> -	error = bread(devvp, (ufs_daddr_t)(SBOFF / size), SBSIZE,
> cred, &bp);
> +	error = bread(devvp, (ufs_daddr_t)(SBOFF / DEV_BSIZE),
> SBSIZE,
> +		      cred, &bp);
>  	if (error)
>  		goto out;
>
> diff -u
> /usr/src/sys/ufs/ffs/fs.h /home/trevin/src/sys/ufs/ffs/fs.h
> ---
> /usr/src/sys/ufs/ffs/fs.h	Thu Apr 11 04:37:24 2002
> +++
> /home/trevin/src/sys/ufs/ffs/fs.h	Sat Jun 01 15:16:23 2002
> @@ -174,12
> +174,12 @@
>  struct fs {
>  	int32_t	 fs_firstfield;		/* historic file system
> linked list, */
>  	int32_t	 fs_unused_1;		/*     used for incore super
> blocks */
> -	ufs_daddr_t fs_sblkno;		/* addr of super-block in filesys */
> -
> ufs_daddr_t fs_cblkno;		/* offset of cyl-block in filesys */
> -	ufs_daddr_t
> fs_iblkno;		/* offset of inode-blocks in filesys */
> -	ufs_daddr_t
> fs_dblkno;		/* offset of first data after cg */
> +	ufs_daddr_t fs_sblkno;
> /* offset of dup super-block in cg */
> +	ufs_daddr_t fs_cblkno;		/* offset
> of cyl-block in cyl grp */
> +	ufs_daddr_t fs_iblkno;		/* offset of
> inode-blocks in cyl grp */
> +	ufs_daddr_t fs_dblkno;		/* offset of first
> data block in cg */
>  	int32_t	 fs_cgoffset;		/* cylinder group offset in
> cylinder */
> -	int32_t	 fs_cgmask;		/* used to calc mod fs_ntrak */
> +
> int32_t	 fs_cgmask;		/* simplified mod fs_ntrak */

I can't quite tell, but I want to make sure you've only changed the
comments; this structure is used on-disk, and if we tweak it we have to
version the FS.

>  	int32_t	 fs_time;		/*
> last time written */
>  	int32_t	 fs_size;		/* number of blocks in fs */
>
> int32_t	 fs_dsize;		/* number of data blocks in fs */
> @@ -223,13 +223,13
> @@
>  /* these fields are derived from the hardware */
>  	int32_t	 fs_ntrak;
> /* tracks per cylinder */
>  	int32_t	 fs_nsect;		/* sectors per track */
> -
> int32_t	 fs_spc;		/* sectors per cylinder */
> +	int32_t	 fs_spc;		/*
> DEV_BSIZE blocks per cylinder */
>  /* this comes from the disk driver
> partitioning */
>  	int32_t	 fs_ncyl;		/* cylinders in file system */
>  /*
> these fields can be computed from the others */
>  	int32_t	 fs_cpg;		/*
> cylinders per group */
>  	int32_t	 fs_ipg;		/* inodes per group */
> -	int32_t
>  fs_fpg;		/* blocks per group * fs_frag */
> +	int32_t	 fs_fpg;		/* frags per
> group */
>  /* this data must be re-computed after crashes */
>  	struct	csum
> fs_cstotal;	/* cylinder summary information */
>  /* these fields are cleared
> at mount time */
> @@ -432,7 +432,7 @@
>
>  /*
>   * Turn file system block
> numbers into disk block addresses.
> - * This maps file system blocks to
> device size blocks.
> + * This maps file system blocks to DEV_BSIZE blocks.
>
> */
>  #define	fsbtodb(fs, b)	((b) << (fs)->fs_fsbtodb)
>  #define	dbtofsb(fs,
> b)	((b) >> (fs)->fs_fsbtodb)
> @@ -440,6 +440,7 @@
>  /*
>   * Cylinder group
> macros to locate things in cylinder groups.
>   * They calc file system
> addresses of cylinder group data structures.
> + * All values are fragment
> numbers within the filesystem.
>   */
>  #define	cgbase(fs, c)
> ((ufs_daddr_t)((fs)->fs_fpg * (c)))
>  #define	cgdmin(fs, c)	(cgstart(fs, c)
> + (fs)->fs_dblkno)	/* 1st data */
> @@ -470,10 +471,12 @@
>
>  /*
>   * Extract
> the bits for a block from a map.
> - * Compute the cylinder and rotational
> position of a cyl block addr.
>   */
>  #define	blkmap(fs, map, loc) \
>
> (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY -
> (fs)->fs_frag)))
> +/*
> + * Compute the cylinder and rotational position of a
> DEV_BSIZE block addr.
> + */
>  #define	cbtocylno(fs, bno) \
>      (fsbtodb(fs,
> bno) / (fs)->fs_spc)
>  #define	cbtorpos(fs, bno) \
> @@ -532,8 +535,7 @@
>
> : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
>
>  /*
> - * Number of disk
> sectors per block/fragment; assumes DEV_BSIZE byte
> - * sector size.
> + *
> Number of DEV_BSIZE units per block/fragment
>   */
>  #define	NSPB(fs)
> ((fs)->fs_nspf << (fs)->fs_fragshift)
>  #define	NSPF(fs)	((fs)->fs_nspf)

Overall, I think it'd be easier to leave the FS code in terms of
underlying sectors, and just be clear when we're looking at a sector
offset or a byte ofset. Then all we have to do is tweak the buffer cache
interface so that it is always in terms of DEV_BSIZE, which might not be
underlying sector size.

Since ffs (AFAICT) reads & writes in fsblock ("fragment") sizes, if we
require fsblocks be at least as big as a disk block (quite reasonable),
then it won't matter for most of the code.

Take care,

Bill