Subject: lseek() extension for spare files
To: None <tech-kern@NetBSD.org>
From: Reinoud Zandijk <reinoud@netbsd.org>
List: tech-kern
Date: 09/21/2006 03:44:20
--GvXjxJ+pjyke8COw
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Dear folks,

attached is a patch that implements the lseek() extension for sparse files 
as can be found in Solaris 10. Linux aparently has already 
adopted/implented it also.

It adds the SEEK_DATA and SEEK_HOLE `whence' arguments to lseek(). For a 
more detailed look see the solaris 10 man page :

http://tinyurl.com/fk6we

Although its pretty complete and smoothed out it might need some fine 
tuning. Note that in this patch no file system has yet implemented sparse 
area reporting and the genfs implementation, that allmost all use, 
implements the basic functionality.

If its ok, i'd like to commit it.

With regards,
Reinoud


--GvXjxJ+pjyke8COw
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=DIFFS

Index: include/stdio.h
===================================================================
RCS file: /cvsroot/src/include/stdio.h,v
retrieving revision 1.64
diff -u -p -r1.64 stdio.h
--- include/stdio.h	10 May 2006 21:09:45 -0000	1.64
+++ include/stdio.h	21 Sep 2006 01:04:02 -0000
@@ -203,6 +203,12 @@ __END_DECLS
 #ifndef SEEK_END
 #define	SEEK_END	2	/* set file offset to EOF plus offset */
 #endif
+#ifndef SEEK_DATA
+#define SEEK_DATA	3	/* Set file pointer to next data past offset */
+#endif
+#ifndef SEEK_HOLE
+#define SEEK_HOLE	4	/* Set file pointer to next hole past offset */
+#endif
 
 #define	stdin	(&__sF[0])
 #define	stdout	(&__sF[1])
Index: lib/libc/sys/lseek.2
===================================================================
RCS file: /cvsroot/src/lib/libc/sys/lseek.2,v
retrieving revision 1.22
diff -u -p -r1.22 lseek.2
--- lib/libc/sys/lseek.2	13 May 2004 10:20:58 -0000	1.22
+++ lib/libc/sys/lseek.2	21 Sep 2006 01:04:05 -0000
@@ -29,7 +29,7 @@
 .\"
 .\"     @(#)lseek.2	8.3 (Berkeley) 4/19/94
 .\"
-.Dd April 19, 1994
+.Dd September 21, 2006
 .Dt LSEEK 2
 .Os
 .Sh NAME
@@ -86,6 +86,29 @@ the offset is set to the size of the
 file plus
 .Fa offset
 bytes.
+.It
+If
+.Fa whence
+is
+.Dv SEEK_DATA ,
+the offset is set to the next non-hole region which file offset is
+greater or equal to the provided
+.Fa offset
+in bytes. If specifying an
+.Fa offset
+of 0 bytes, there is guaranteed to be a data region for easy programming.
+.It
+If
+.Fa whence
+is
+.Dv SEEK_HOLE ,
+the offset is set to the next hole region which file offset is
+greater or equal to the provided
+.Fa offset
+in bytes. If specifying an
+.Fa offset
+within the boundaries of the file, there is a guaranteed virtual hole at the
+end of the file for easy programming.
 .El
 .Pp
 The
@@ -121,6 +144,9 @@ is associated with a pipe, socket, or FI
 .It Bq Er EINVAL
 .Fa whence
 is not a proper value, or the resulting file offset would be invalid.
+.It Bq Er ENXIO
+No more data regions or hole regions are present past the supplied
+.Fa offset
 .El
 .Sh SEE ALSO
 .Xr dup 2 ,
@@ -129,8 +155,18 @@ is not a proper value, or the resulting 
 The
 .Fn lseek
 function conforms to
-.St -p1003.1-90 .
+.St -p1003.1-90 . The
+.Dv SEEK_DATA
+and
+.Dv SEEK_HOLE
+conform to the Solaris 10 implemention.
 .Sh BUGS
 This document's use of
 .Fa whence
 is incorrect English, but is maintained for historical reasons.
+.Sh HISTORY
+The
+.Dv SEEK_DATA
+and
+.Dv SEEK_HOLE
+functionality was added in NetBSD 5.0
Index: sys/fs/union/union_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/fs/union/union_vnops.c,v
retrieving revision 1.16
diff -u -p -r1.16 union_vnops.c
--- sys/fs/union/union_vnops.c	14 May 2006 21:31:52 -0000	1.16
+++ sys/fs/union/union_vnops.c	21 Sep 2006 01:04:19 -0000
@@ -1207,9 +1207,12 @@ union_seek(v)
 {
 	struct vop_seek_args /* {
 		struct vnode *a_vp;
-		off_t  a_oldoff;
-		off_t  a_newoff;
+		off_t a_oldoff;
+		int a_whence;
+		off_t a_offset;
+		off_t *a_newoff;
 		kauth_cred_t a_cred;
+		struct lwp *a_l;
 	} */ *ap = v;
 	struct vnode *ovp = OTHERVP(ap->a_vp);
 
Index: sys/kern/vfs_syscalls.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.270
diff -u -p -r1.270 vfs_syscalls.c
--- sys/kern/vfs_syscalls.c	13 Sep 2006 10:07:42 -0000	1.270
+++ sys/kern/vfs_syscalls.c	21 Sep 2006 01:04:19 -0000
@@ -2117,7 +2117,6 @@ sys_lseek(struct lwp *l, void *v, regist
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
-	struct vattr vattr;
 	off_t newoff;
 	int error;
 
@@ -2129,31 +2128,14 @@ sys_lseek(struct lwp *l, void *v, regist
 	vp = (struct vnode *)fp->f_data;
 	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
 		error = ESPIPE;
-		goto out;
+	} else {
+		error = VOP_SEEK(vp, fp->f_offset, SCARG(uap, whence),
+		    SCARG(uap, offset), &newoff, cred, l);
 	}
 
-	switch (SCARG(uap, whence)) {
-	case SEEK_CUR:
-		newoff = fp->f_offset + SCARG(uap, offset);
-		break;
-	case SEEK_END:
-		error = VOP_GETATTR(vp, &vattr, cred, l);
-		if (error)
-			goto out;
-		newoff = SCARG(uap, offset) + vattr.va_size;
-		break;
-	case SEEK_SET:
-		newoff = SCARG(uap, offset);
-		break;
-	default:
-		error = EINVAL;
-		goto out;
-	}
-	if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) != 0)
-		goto out;
+	if (error == 0)
+		*(off_t *)retval = fp->f_offset = newoff;
 
-	*(off_t *)retval = fp->f_offset = newoff;
- out:
 	FILE_UNUSE(fp, l);
 	return (error);
 }
@@ -2199,7 +2181,8 @@ sys_pread(struct lwp *l, void *v, regist
 	 * XXX This works because no file systems actually
 	 * XXX take any action on the seek operation.
 	 */
-	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
+	error = VOP_SEEK(vp, fp->f_offset, SEEK_SET, offset, NULL, fp->f_cred, l);
+	if (error != 0)
 		goto out;
 
 	/* dofileread() will unuse the descriptor for us */
@@ -2252,7 +2235,8 @@ sys_preadv(struct lwp *l, void *v, regis
 	 * XXX This works because no file systems actually
 	 * XXX take any action on the seek operation.
 	 */
-	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
+	error = VOP_SEEK(vp, fp->f_offset, SEEK_SET, offset, NULL, fp->f_cred, l);
+	if (error != 0)
 		goto out;
 
 	/* dofilereadv() will unuse the descriptor for us */
@@ -2305,7 +2289,8 @@ sys_pwrite(struct lwp *l, void *v, regis
 	 * XXX This works because no file systems actually
 	 * XXX take any action on the seek operation.
 	 */
-	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
+	error = VOP_SEEK(vp, fp->f_offset, SEEK_SET, offset, NULL, fp->f_cred, l);
+	if (error != 0)
 		goto out;
 
 	/* dofilewrite() will unuse the descriptor for us */
@@ -2358,7 +2343,8 @@ sys_pwritev(struct lwp *l, void *v, regi
 	 * XXX This works because no file systems actually
 	 * XXX take any action on the seek operation.
 	 */
-	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
+	error = VOP_SEEK(vp, fp->f_offset, SEEK_SET, offset, NULL, fp->f_cred, l);
+	if (error != 0)
 		goto out;
 
 	/* dofilewritev() will unuse the descriptor for us */
Index: sys/kern/vnode_if.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vnode_if.c,v
retrieving revision 1.66
diff -u -p -r1.66 vnode_if.c
--- sys/kern/vnode_if.c	13 Jul 2006 12:00:25 -0000	1.66
+++ sys/kern/vnode_if.c	21 Sep 2006 01:04:19 -0000
@@ -742,8 +742,11 @@ const struct vnodeop_desc vop_seek_desc 
 int
 VOP_SEEK(struct vnode *vp,
     off_t oldoff,
-    off_t newoff,
-    kauth_cred_t cred)
+    int whence,
+    off_t offset,
+    off_t *newoff,
+    kauth_cred_t cred,
+    struct lwp *l)
 {
 	struct vop_seek_args a;
 #ifdef VNODE_LOCKDEBUG
@@ -751,8 +754,11 @@ VOP_SEEK(struct vnode *vp,
 	a.a_desc = VDESC(vop_seek);
 	a.a_vp = vp;
 	a.a_oldoff = oldoff;
+	a.a_whence = whence;
+	a.a_offset = offset;
 	a.a_newoff = newoff;
 	a.a_cred = cred;
+	a.a_l = l;
 	return (VCALL(vp, VOFFSET(vop_seek), &a));
 }
 
Index: sys/miscfs/genfs/genfs_vnops.c
===================================================================
RCS file: /cvsroot/src/sys/miscfs/genfs/genfs_vnops.c,v
retrieving revision 1.129
diff -u -p -r1.129 genfs_vnops.c
--- sys/miscfs/genfs/genfs_vnops.c	15 Sep 2006 15:51:12 -0000	1.129
+++ sys/miscfs/genfs/genfs_vnops.c	21 Sep 2006 01:04:20 -0000
@@ -50,6 +50,7 @@ __KERNEL_RCSID(0, "$NetBSD: genfs_vnops.
 #include <sys/mman.h>
 #include <sys/file.h>
 #include <sys/kauth.h>
+#include <sys/unistd.h>
 
 #include <miscfs/genfs/genfs.h>
 #include <miscfs/genfs/genfs_node.h>
@@ -91,12 +92,54 @@ genfs_seek(void *v)
 	struct vop_seek_args /* {
 		struct vnode *a_vp;
 		off_t a_oldoff;
-		off_t a_newoff;
-		kauth_cred_t cred;
-	} */ *ap = v;
+		int a_whence;
+		off_t a_offset;
+		off_t *a_newoff;
+		kauth_cred_t a_cred;
+		struct lwp *a_l;
+	}; */ *ap = v;
+	struct vattr vattr;
+	off_t newoff;
+	int error;
+
+	newoff = ap->a_oldoff;
+	switch (ap->a_whence) {
+	case SEEK_CUR:
+		newoff = ap->a_oldoff + ap->a_offset;
+		break;
+	case SEEK_END:
+		error = VOP_GETATTR(ap->a_vp, &vattr, ap->a_cred, ap->a_l);
+		if (error)
+			return error;
+		newoff = ap->a_offset + vattr.va_size;
+		break;
+	case SEEK_SET:
+		newoff = ap->a_offset;
+		break;
+	case SEEK_DATA:
+		/* if at start, there is one data block */
+		if (ap->a_offset != 0)
+			return ENXIO;
+		newoff = 0;
+		break;
+	case SEEK_HOLE:
+		/* there exists one virtual hole at the end of the file */
+		error = VOP_GETATTR(ap->a_vp, &vattr, ap->a_cred, ap->a_l);
+		if (error)
+			return error;
+		if (ap->a_offset > vattr.va_size)
+			return ENXIO;
+		newoff = ap->a_offset;
+		break;
+	default:
+		return EINVAL;
+	}
 
-	if (ap->a_newoff < 0)
-		return (EINVAL);
+	if (newoff < 0)
+		return EINVAL;
+
+	if (ap->a_newoff)
+		*(ap->a_newoff) = newoff;
 
 	return (0);
 }
Index: sys/sys/fcntl.h
===================================================================
RCS file: /cvsroot/src/sys/sys/fcntl.h,v
retrieving revision 1.33
diff -u -p -r1.33 fcntl.h
--- sys/sys/fcntl.h	29 Nov 2005 22:52:02 -0000	1.33
+++ sys/sys/fcntl.h	21 Sep 2006 01:04:20 -0000
@@ -256,6 +256,12 @@ struct flock {
 #ifndef	SEEK_END
 #define	SEEK_END	2	/* set file offset to EOF plus offset */
 #endif
+#ifndef SEEK_DATA
+#define SEEK_DATA	3	/* Set file pointer to next data past offset */
+#endif
+#ifndef SEEK_HOLE
+#define SEEK_HOLE	4	/* Set file pointer to next hole past offset */
+#endif
 
 /*
  * posix_advise advisories.
Index: sys/sys/unistd.h
===================================================================
RCS file: /cvsroot/src/sys/sys/unistd.h,v
retrieving revision 1.35
diff -u -p -r1.35 unistd.h
--- sys/sys/unistd.h	14 Aug 2006 18:17:48 -0000	1.35
+++ sys/sys/unistd.h	21 Sep 2006 01:04:20 -0000
@@ -108,6 +108,8 @@
 #define	SEEK_SET	0	/* set file offset to offset */
 #define	SEEK_CUR	1	/* set file offset to current plus offset */
 #define	SEEK_END	2	/* set file offset to EOF plus offset */
+#define SEEK_DATA	3	/* Set file pointer to next data past offset */
+#define SEEK_HOLE	4	/* Set file pointer to next hole past offset */
 
 #if defined(_NETBSD_SOURCE)
 /* whence values for lseek(2); renamed by POSIX 1003.1 */
Index: sys/sys/vnode_if.h
===================================================================
RCS file: /cvsroot/src/sys/sys/vnode_if.h,v
retrieving revision 1.62
diff -u -p -r1.62 vnode_if.h
--- sys/sys/vnode_if.h	13 Jul 2006 12:00:26 -0000	1.62
+++ sys/sys/vnode_if.h	21 Sep 2006 01:04:21 -0000
@@ -225,11 +225,15 @@ struct vop_seek_args {
 	const struct vnodeop_desc *a_desc;
 	struct vnode *a_vp;
 	off_t a_oldoff;
-	off_t a_newoff;
+	int a_whence;
+	off_t a_offset;
+	off_t *a_newoff;
 	kauth_cred_t a_cred;
+	struct lwp *a_l;
 };
 extern const struct vnodeop_desc vop_seek_desc;
-int VOP_SEEK(struct vnode *, off_t, off_t, kauth_cred_t);
+int VOP_SEEK(struct vnode *, off_t, int, off_t, off_t *, kauth_cred_t,
+    struct lwp *);
 
 struct vop_remove_args {
 	const struct vnodeop_desc *a_desc;

--GvXjxJ+pjyke8COw--