Subject: Explicit enabling of disk caches
To: None <tech-kern@netbsd.org>
From: Jason R Thorpe <thorpej@wasabisystems.com>
List: tech-kern
Date: 09/02/2001 10:31:04
--dTy3Mrz/UPE2dbVg
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Folks...

I've been thinking of ways we can improve I/O performance, and one
obvious way is to provide a mechanism to explicitly manipulate the
read and write caches on disks.

The first step in this is to provide an abstract mechanism to query,
enable/disable, and flush the caches on disks.  I.e. you don't want
to litter knowledge of SCSI throughout the tree, and e.g. RAID controllers
which have caches are likely to have different mechanisms for cache
manipulation.

So, what I'm proposing are 3 new ioctls:

	DIOCGCACHE	get cache enable bits
	DIOCSCACHE	set cache enable bits
	DIOCSYNCCCACHE	synchronize the cache

DIOC{G,S}CACHE use an `int' argument that has the following bits:

	DKCACHE_READ	read cache enabled
	DKCACHE_WRITE	write(-back) cache enabled

DIOCCACHESYNC uses an `int' argument, treated as a boolean, indicating
whether or not to force the cache flush.  This allows e.g. battery-backed
RAID caches to skip the cache flush unless you tell it to REALLY REALLY
flush it (e.g. you're going to swap out the disk).

I've written a cheezy little dkctl(8) program that uses these ioctls
to do the obvious thing.  This program will need a little work before
it goes into the tree.

But the eventual goal is for the file system to be able to explicitly
use the cache flush operation as a barrier, which would let disks
run with the write-back cache enabled in a safe fashion.  Something like
this:

	write out data -> sync cache -> write out metadata -> sync cache

Side note -- I'm considering a bit in `struct buf', maybe B_WRITETHROUGH,
which would make it look more like this:

	write out data -> sync cache -> write-through out metadata

..saving a SCSI command (SCSI has a bit in the CDB that can force
write-through on a per-write basis).

Attached are diffs that implement the above ioctls in the SCSI disk
driver.  If we decide these are a good idea, then I'll add stubs in
the other disk drivers as well (at least for the SYNCCACHE ioctl).

-- 
        -- Jason R. Thorpe <thorpej@wasabisystems.com>

--dTy3Mrz/UPE2dbVg
Content-Type: text/plain; charset=us-ascii
Content-Description: dkcache.diffs
Content-Disposition: attachment; filename=foo

Index: sys/dkio.h
===================================================================
RCS file: /cvsroot/syssrc/sys/sys/dkio.h,v
retrieving revision 1.5
diff -c -r1.5 dkio.h
*** sys/dkio.h	2001/01/07 17:55:41	1.5
--- sys/dkio.h	2001/09/02 17:12:08
***************
*** 75,78 ****
--- 75,88 ----
  #define	ODIOCGDEFLABEL	_IOR('d', 114, struct olddisklabel)
  #endif
  
+ 		/* disk cache enable/disable */
+ #define	DIOCGCACHE	_IOR('d', 116, int)	/* get cache enables */
+ #define	DIOCSCACHE	_IOW('d', 117, int)	/* set cache enables */
+ 
+ #define	DKCACHE_READ	0x01		/* read cache enabled */
+ #define	DKCACHE_WRITE	0x02		/* write(back) cache enabled */
+ 
+ 		/* sync disk cache */
+ #define	DIOCCACHESYNC	_IOW('d', 118, int)	/* sync cache (force?) */
+ 
  #endif /* _SYS_DKIO_H_ */
Index: dev/scsipi/sd.c
===================================================================
RCS file: /cvsroot/syssrc/sys/dev/scsipi/sd.c,v
retrieving revision 1.178
diff -c -r1.178 sd.c
*** dev/scsipi/sd.c	2001/07/18 18:21:05	1.178
--- dev/scsipi/sd.c	2001/09/02 17:12:09
***************
*** 912,917 ****
--- 912,919 ----
  		case DIOCLOCK:
  		case DIOCEJECT:
  		case ODIOCEJECT:
+ 		case DIOCGCACHE:
+ 		case DIOCSCACHE:
  		case SCIOCIDENTIFY:
  		case OSCIOCIDENTIFY:
  		case SCIOCCOMMAND:
***************
*** 1047,1052 ****
--- 1049,1089 ----
  		memcpy(addr, &newlabel, sizeof (struct olddisklabel));
  		return (0);
  #endif
+ 
+ 	case DIOCGCACHE:
+ 		if (sd->sc_ops->sdo_getcache != NULL)
+ 			return ((*sd->sc_ops->sdo_getcache)(sd, (int *) addr));
+ 
+ 		/* Not supported on this device. */
+ 		*(int *) addr = 0;
+ 		return (0);
+ 
+ 	case DIOCSCACHE:
+ 		if ((flag & FWRITE) == 0)
+ 			return (EBADF);
+ 		if (sd->sc_ops->sdo_setcache != NULL)
+ 			return ((*sd->sc_ops->sdo_setcache)(sd, *(int *) addr));
+ 
+ 		/* Not supported on this device. */
+ 		return (EOPNOTSUPP);
+ 
+ 	case DIOCCACHESYNC:
+ 		/*
+ 		 * XXX Do we really need to care about having a writeable
+ 		 * file descriptor here?
+ 		 */
+ 		if ((flag & FWRITE) == 0)
+ 			return (EBADF);
+ 		if (((sd->flags & SDF_DIRTY) != 0 || *(int *)addr != 0) &&
+ 		    sd->sc_ops->sdo_flush != NULL) {
+ 			error = (*sd->sc_ops->sdo_flush)(sd, 0);
+ 			if (error)
+ 				sd->flags &= ~SDF_FLUSHING;
+ 			else
+ 				sd->flags &= ~(SDF_FLUSHING|SDF_DIRTY);
+ 		} else
+ 			error = 0;
+ 		return (error);
  
  	default:
  		if (part != RAW_PART)
Index: dev/scsipi/sd_scsi.c
===================================================================
RCS file: /cvsroot/syssrc/sys/dev/scsipi/sd_scsi.c,v
retrieving revision 1.19
diff -c -r1.19 sd_scsi.c
*** dev/scsipi/sd_scsi.c	2001/08/31 07:09:42	1.19
--- dev/scsipi/sd_scsi.c	2001/09/02 17:12:09
***************
*** 62,67 ****
--- 62,68 ----
  #include <sys/errno.h>
  #include <sys/device.h>
  #include <sys/disk.h>
+ #include <sys/dkio.h>
  
  #include <dev/scsipi/scsipi_all.h>
  #include <dev/scsipi/scsi_all.h>
***************
*** 102,111 ****
--- 103,116 ----
  static int	sd_scsibus_get_optparms __P((struct sd_softc *,
  		    struct disk_parms *, int));
  static int	sd_scsibus_flush __P((struct sd_softc *, int));
+ static int	sd_scsibus_getcache __P((struct sd_softc *, int *));
+ static int	sd_scsibus_setcache __P((struct sd_softc *, int));
  
  const struct sd_ops sd_scsibus_ops = {
  	sd_scsibus_get_parms,
  	sd_scsibus_flush,
+ 	sd_scsibus_getcache,
+ 	sd_scsibus_setcache,
  };
  
  int
***************
*** 369,372 ****
--- 374,450 ----
  		       flags|XS_CTL_IGNORE_ILLEGAL_REQUEST));
  	} else
  		return(0);
+ }
+ 
+ int
+ sd_scsibus_getcache(sd, bitsp)
+ 	struct sd_softc *sd;
+ 	int *bitsp;
+ {
+ 	struct scsipi_periph *periph = sd->sc_periph;
+ 	struct sd_scsibus_mode_sense_data scsipi_sense;
+ 	int error, bits = 0;
+ 
+ 	if (periph->periph_version < 2)
+ 		return (EOPNOTSUPP);
+ 
+ 	error = sd_scsibus_mode_sense(sd, &scsipi_sense, 8, 0);
+ 	if (error)
+ 		return (error);
+ 
+ 	if ((scsipi_sense.pages.caching_params.flags & CACHING_RCD) == 0)
+ 		bits |= DKCACHE_READ;
+ 	if (scsipi_sense.pages.caching_params.flags & CACHING_WCE)
+ 		bits |= DKCACHE_WRITE;
+ 
+ 	*bitsp = bits;
+ 
+ 	return (0);
+ }
+ 
+ int
+ sd_scsibus_setcache(sd, bits)
+ 	struct sd_softc *sd;
+ 	int bits;
+ {
+ 	struct scsipi_periph *periph = sd->sc_periph;
+ 	struct sd_scsibus_mode_sense_data scsipi_sense;
+ 	int error;
+ 	uint8_t flags;
+ 
+ 	if (periph->periph_version < 2)
+ 		return (EOPNOTSUPP);
+ 
+ 	error = sd_scsibus_mode_sense(sd, &scsipi_sense, 8, 0); 
+ 	if (error)
+ 		return (error);
+ 
+ 	flags = scsipi_sense.pages.caching_params.flags &
+ 	    ~(CACHING_RCD|CACHING_WCE);
+ 
+ 	if ((bits & DKCACHE_READ) == 0)
+ 		flags |= CACHING_RCD;
+ 	if (bits & DKCACHE_WRITE)
+ 		flags |= CACHING_WCE;
+ 
+ 	if (flags == scsipi_sense.pages.caching_params.flags)
+ 		return (0);
+ 
+ 	scsipi_sense.pages.caching_params.flags = flags;
+ 
+ 	if ((sd->sc_periph->periph_quirks & PQUIRK_ONLYBIG) &&
+ 	    !(sd->sc_periph->periph_quirks & PQUIRK_NOBIGMODESENSE)) {
+ 		error = scsipi_mode_select_big(sd->sc_periph, 0,
+ 		   (struct scsipi_mode_header_big*)&scsipi_sense.header,
+ 		    sizeof(scsipi_sense),
+ 		    flags /* | XS_CTL_SILENT */ | XS_CTL_DATA_ONSTACK,
+ 		    SDRETRIES, 10000);
+ 	} else {
+ 		error = scsipi_mode_select(sd->sc_periph, 0,
+ 		    &scsipi_sense.header, sizeof(scsipi_sense),
+ 		    flags /* | XS_CTL_SILENT */ | XS_CTL_DATA_ONSTACK,
+ 		    SDRETRIES, 10000);
+ 	}
+ 
+ 	return (error);
  }
Index: dev/scsipi/sdvar.h
===================================================================
RCS file: /cvsroot/syssrc/sys/dev/scsipi/sdvar.h,v
retrieving revision 1.15
diff -c -r1.15 sdvar.h
*** dev/scsipi/sdvar.h	2001/05/23 02:16:19	1.15
--- dev/scsipi/sdvar.h	2001/09/02 17:12:10
***************
*** 109,114 ****
--- 109,116 ----
  	int	(*sdo_get_parms) __P((struct sd_softc *, struct disk_parms *,
  		    int));
  	int	(*sdo_flush) __P((struct sd_softc *, int));
+ 	int	(*sdo_getcache) __P((struct sd_softc *, int *));
+ 	int	(*sdo_setcache) __P((struct sd_softc *, int));
  };
  #define	SDGP_RESULT_OK		0	/* paramters obtained */
  #define	SDGP_RESULT_OFFLINE	1	/* no media, or otherwise losing */

--dTy3Mrz/UPE2dbVg--