Subject: Re: 64-bit daddr_t problems with libsa
To: Simon Burge <simonb@wasabisystems.com>
From: Luke Mewburn <lukem@wasabisystems.com>
List: tech-kern
Date: 01/30/2003 12:06:31
--gDGSpKKIBgtShtf+
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Wed, Jan 29, 2003 at 08:39:17PM +1100, Simon Burge wrote:
  | On a number of platforms the size of the bootblocks have reasonably
  | severe size constraints.
  | 
  | With the changes to make daddr_t a 64-bit type, the following bit of
  | code in libsa's ufs.c
  | 
  | 	idx = file_block / fp->f_nindir[level - 1];
  | 	file_block %= fp->f_nindir[level - 1];
  | 
  | ends up generating library calls to 64bit division and modulus library
  | functions, and at least on alpha, pmax and sbmips we blow our space
  | budget.
  | 
  | It turns out that the code above is only used if we hit the double
  | indirect block, so by conditionally leaving out support for the double-
  | and triple-indirect blocks we can trim out enough code to fit back in
  | the space restrictions on at least alpha (see patch below).  I have a
  | similar change for lfs.c too along the same lines.  Suggestions for a
  | better name would be appreciated.  :-)

Here's what I've calculated the "maximum" file size can be for a file
that only has direct & single indirect blocks:

	blocksize	direct		single
	---------	------		------
	4096		49152		4243456
	8192		98304		16875520
	16384		196608		67305472

(program that calculated this attached below.)

With this change, you will only be able to support a "just over" 4MB
kernel on a 4KB file system...

How much space do the 64bit division & modulus functions occupy?

Is this change necessary with your change below?



  | On pmax however, we're still 40 bytes over...
  | 
  | To get back extra space, Matt Green came up with the following patch
  | for <sys/types.h> 
  | 
  | +#ifndef daddr_t
  |  typedef	int64_t		daddr_t;	/* disk address */
  | +#endif
  | +
  | 
  | and then adding
  | 
  | 	CPPFLAGS+=      -Ddaddr_t='unsigned int'
  | 
  | to any Makefiles where you need a smaller ufs.o or lfs.o.
  | 
  | 
  | I'd like to add both of these changes, but not enable the
  | LIBSA_UFS_SMALL_FILE changes anywhere just yet.  The overriding of
  | daddr_t gets enough back for now, but the LIBSA_UFS_SMALL_FILE changes
  | may buy us space that we need down the track.
  | 
  | Comments?

I'd prefer "uint32_t" over "unsigned int" (let's be obvious here ;-)

If this change alone gets the size back down, I'd prefer to keep the
double/triple indirect support


  | 
  | Simon.
  | --
  | Simon Burge                                   <simonb@wasabisystems.com>
  | NetBSD Development, Support and Service:   http://www.wasabisystems.com/
  | 
  | 
  | Index: ufs.c
  | ===================================================================
  | RCS file: /cvsroot/src/sys/lib/libsa/ufs.c,v
  | retrieving revision 1.31
  | diff -d -p -u -r1.31 ufs.c
  | --- ufs.c	2003/01/24 21:55:18	1.31
  | +++ ufs.c	2003/01/29 07:16:15
  | @@ -106,9 +106,11 @@ struct file {
  |  	off_t		f_seekp;	/* seek pointer */
  |  	struct fs	*f_fs;		/* pointer to super-block */
  |  	struct dinode	f_di;		/* copy of on-disk inode */
  | +#ifndef LIBSA_UFS_SMALL_FILE
  |  	unsigned int	f_nindir[NIADDR];
  |  					/* number of blocks mapped by
  |  					   indirect block at level i */
  | +#endif
  |  	char		*f_blk[NIADDR];	/* buffer for indirect block at
  |  					   level i */
  |  	size_t		f_blksize[NIADDR];
  | @@ -236,6 +238,7 @@ block_map(f, file_block, disk_block_p)
  |  	 * nindir[2] = NINDIR**3
  |  	 *	etc
  |  	 */
  | +#ifndef LIBSA_UFS_SMALL_FILE
  |  	for (level = 0; level < NIADDR; level++) {
  |  		if (file_block < fp->f_nindir[level])
  |  			break;
  | @@ -245,6 +248,9 @@ block_map(f, file_block, disk_block_p)
  |  		/* Block number too high */
  |  		return (EFBIG);
  |  	}
  | +#else
  | +	level = 0;	/* XXX - check NINDIR here and return EFBIG? */
  | +#endif
  |  
  |  	ind_block_num = fp->f_di.di_ib[level];
  |  
  | @@ -276,10 +282,12 @@ block_map(f, file_block, disk_block_p)
  |  		/* XXX ondisk32 */
  |  		ind_p = (int32_t *)fp->f_blk[level];
  |  
  | +#ifndef LIBSA_UFS_SMALL_FILE
  |  		if (level > 0) {
  |  			idx = file_block / fp->f_nindir[level - 1];
  |  			file_block %= fp->f_nindir[level - 1];
  |  		} else
  | +#endif
  |  			idx = file_block;
  |  
  |  		ind_block_num = ind_p[idx];
  | @@ -454,6 +462,7 @@ ufs_open(path, f)
  |  	ffs_oldfscompat(fs);
  |  #endif
  |  
  | +#ifndef LIBSA_UFS_SMALL_FILE
  |  	/*
  |  	 * Calculate indirect block levels.
  |  	 */
  | @@ -467,6 +476,7 @@ ufs_open(path, f)
  |  			fp->f_nindir[level] = mult;
  |  		}
  |  	}
  | +#endif
  |  
  |  	inumber = ROOTINO;
  |  	if ((rc = read_inode(inumber, f)) != 0)


--gDGSpKKIBgtShtf+
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="testindir.c"

#include <sys/param.h>
#include <ufs/ufs/dinode.h>
#include <ufs/ffs/fs.h>
#include <stdio.h>
#include <stdlib.h>


int
main(int argc, char *argv[])
{
	int blocksize, i;
	long long m, maxsize;

#define NUMINDIR(x)	((x) / sizeof(ufs_daddr_t))

	if (argc != 2) {
		fprintf(stderr, "usage: %s blocksize\n", argv[0]);
		exit(1);
	}
	blocksize = atoi(argv[1]);
	maxsize = blocksize * NDADDR;
	printf("direct: blocksize %d, maxsize %lld\n", blocksize, maxsize);
	for (m = blocksize, i = 1; i <= NIADDR; i++) {
		m *= NUMINDIR(blocksize);
		maxsize += m;
		printf("i %d m %lld maxsize %lld\n", i, m, maxsize);
	}
	exit(0);
}

--gDGSpKKIBgtShtf+--