Subject: RFC: addition of B_MANAGED and B_NESTED to buf.h and vfs_bio.c
To: None <tech-kern@NetBSD.org>
From: Reinoud Zandijk <reinoud@netbsd.org>
List: tech-kern
Date: 01/03/2006 17:34:27
--U+BazGySraz5kW0T
Content-Type: multipart/mixed; boundary="/9DWx/yDrRhgMJTb"
Content-Disposition: inline


--/9DWx/yDrRhgMJTb
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Dear folks,

i'd like to add two flags to buf.h. They add new functionality to the 
buffer structure without changing current behaviour.

--------------------
    B_MANAGED
marks that its buffer contents memory is managed by the caller and should 
not be put on a list for reuse or freed. Normally useable by say a 
filingsystem that has a private memory buffer for say a descriptor or disc 
structure thats not kept in a buffer and wants to write it out without the 
need to manipulate the VM or to copy stuff around.

Managed buffers are claimed and released directly from/to the bufpool to 
not destroy valueable data. Managed buffers can be brelse()'d.

    B_NESTED
marks the buffer as a nested buffer. Nested buffers are currently used in 
genfs and lfs (get/put pages) though implemented as two special case 
generated buffers with call-backs.

A nested buffer is basicly describing the action of the origional buffer 
over a piece of its buffer space. A series of nested buffers created from 
the master buffer describe together the action. Nested buffers can have 
their own call-back and variables too extending the idea used in genfs and 
lfs.

Buffer nesting is transparant. A callee doesn't need to know anything of 
the mechanism and can easily nest it again if it wants too. 
biowait/biodone/brelse work as expected.

When a nested buffer is biodone()'d its master buffer will be updated. When 
the b_resid value of the master buffer then reaches zero, biodone() will be 
called on the master buffer.

Nested buffers are claimed and released directly from/to the bufpool to not 
destroy valueable data. Nested buffers can be brelse()'d.

----------------

I've appended the patch. Note that this code has been tested for normal 
operations but that the managed and nested buffers haven't actually been 
used yet. I'd rather have feedback first before i change UDF to use the 
buffer types. Genfs and lfs could prolly best be done by their authors.

With regards,
Reinoud


--/9DWx/yDrRhgMJTb
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=PATCH

Index: kern/vfs_bio.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_bio.c,v
retrieving revision 1.148
diff -u -r1.148 vfs_bio.c
--- kern/vfs_bio.c	24 Dec 2005 19:12:23 -0000	1.148
+++ kern/vfs_bio.c	3 Jan 2006 16:05:47 -0000
@@ -904,6 +904,16 @@
 		wakeup(bp);
 	}
 
+	/* 
+	 * If it's a managed buffer, only recycle struct buf since it doesn't
+	 * own its data block and should not be cached or reused in the normal
+	 * way.
+	 */
+	if (ISSET(bp->b_flags, B_MANAGED)) {
+		bp->b_bufsize = 0;
+		goto clearup;
+	};
+
 	/*
 	 * Determine which queue the buffer should be on, then put it there.
 	 */
@@ -990,6 +1000,7 @@
 	CLR(bp->b_flags, B_AGE|B_ASYNC|B_BUSY|B_NOCACHE);
 	SET(bp->b_flags, B_CACHE);
 
+clearup:
 	/* Allow disk interrupts. */
 	simple_unlock(&bp->b_interlock);
 	simple_unlock(&bqueue_slock);
@@ -1003,6 +1014,85 @@
 }
 
 /*
+ * Get a new managed buffer. Memory assigned to it is NOT owned by the buffer
+ * cache but by the caller, typically a filingsystem.
+ */
+struct buf *
+getmanagedbuf(int blkno, caddr_t base, int size)
+{
+	int s;
+	struct buf *bp;
+
+	KASSERT(base != NULL);
+	KASSERT(size > 0);
+
+	s = splbio();
+	simple_lock(&bqueue_slock);
+
+	/* please don't destroy valueable data */
+	bp = pool_get(&bufpool, PR_WAITOK);
+
+	/* Initialise buffer */
+	memset(bp, 0, sizeof(struct buf));
+	BUF_INIT(bp);
+	bp->b_flags |= B_MANAGED;
+
+	bp->b_blkno = blkno;
+	bp->b_data = base;
+	bp->b_bufsize = bp->b_bcount = bp->b_resid = size;
+
+	simple_unlock(&bqueue_slock);
+	splx(s);
+
+	return bp;
+}
+
+/*
+ * Get a new buffer nested into the specified buffer at the given offset and
+ * length. NO read/write actions ought to be caried out on the master buffer
+ * anymore only on the nested buffers as they effectively split up the master
+ * buffer's action.
+ *
+ * Bug alert: make sure all nested buffers cover the complete mbp->resid
+ * space.  If space is to be skipped, mbp->resid needs to be accounted for or
+ * the biodone on mbp won't be called!
+ */
+struct buf *
+nestbuf(struct buf *mbp, int blkno, caddr_t base, int size)
+{
+	int s;
+	struct buf *bp;
+
+	KASSERT(base != NULL);
+	KASSERT(size > 0);
+	KASSERT((mpb==NULL) || (mpb && mbp->b_count < offset+size));
+
+	s = splbio();
+	simple_lock(&bqueue_slock);
+
+	/* please don't destroy valueable data */
+	bp = pool_get(&bufpool, PR_WAITOK);
+
+	/*
+	 * Adjust relevant information from the master buffer. set nested info
+	 * and clear callback info and softdep info.
+	 */
+	memcpy(bp, mbp, sizeof(struct buf));
+	bp->b_flags &= ~B_CALL;
+	bp->b_flags |= B_MANAGED | B_NESTED;
+	bp->b_masterbuf = mbp;
+
+	bp->b_blkno = blkno;
+	bp->b_data = base;
+	bp->b_bufsize = bp->b_bcount = bp->b_resid = size;
+
+	/* avoid confusion for softdep? */
+	LIST_INIT(&bp->b_dep);
+
+	return bp;
+}
+
+/*
  * Determine if a block is in the cache.
  * Just look on what would be its hash chain.  If it's there, return
  * a pointer to it, unless it's marked invalid.  If it's marked invalid,
@@ -1384,6 +1474,8 @@
 biodone(struct buf *bp)
 {
 	int s = splbio();
+	int  isnested;
+	struct buf *mbp;
 
 	simple_lock(&bp->b_interlock);
 	if (ISSET(bp->b_flags, B_DONE))
@@ -1397,6 +1489,18 @@
 	if (!ISSET(bp->b_flags, B_READ))	/* wake up reader */
 		vwakeup(bp);
 
+	/* Propagate status to master buffer if its a nested buffer */
+	isnested = ISSET(bp->b_flags, B_NESTED);
+	mbp = bp->b_masterbuf;
+	if (isnested) {
+		KASSERT(mbp);
+		if (bp->b_flags & B_ERROR) {
+			mbp->b_flags |= B_ERROR;
+			mbp->b_error = bp->b_error;
+		};
+		mbp->b_resid -= bp->b_bcount;
+	};
+
 	/*
 	 * If necessary, call out.  Unlock the buffer before calling
 	 * iodone() as the buffer isn't valid any more when it return.
@@ -1417,6 +1521,12 @@
 	}
 
 	splx(s);
+
+	/* call biodone on master buffer if its completed by this op */
+	if (isnested) {
+		if (mbp->b_resid == 0)
+			biodone(mbp);
+	};
 }
 
 /*
Index: sys/buf.h
===================================================================
RCS file: /cvsroot/src/sys/sys/buf.h,v
retrieving revision 1.84
diff -u -r1.84 buf.h
--- sys/buf.h	11 Dec 2005 12:25:20 -0000	1.84
+++ sys/buf.h	3 Jan 2006 16:05:47 -0000
@@ -145,6 +145,7 @@
 	struct	vnode *b_vp;		/* File vnode. */
 	struct  workhead b_dep;		/* List of filesystem dependencies. */
 	void	*b_saveaddr;		/* Original b_addr for physio. */
+	struct buf *b_masterbuf;	/* If nested, points to masterbuf */
 
 	/*
 	 * private data for owner.
@@ -213,11 +214,14 @@
 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
 #define	B_XXX		0x02000000	/* Debugging flag. */
 #define	B_VFLUSH	0x04000000	/* Buffer is being synced. */
+#define B_MANAGED	0x08000000	/* Buffer's memory is not its own */
+#define B_NESTED	0x10000000	/* Buffer is not standalone */
 
 #define BUF_FLAGBITS \
     "\20\1AGE\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" \
     "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \
-    "\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED\32XXX\33VFLUSH"
+    "\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED\32XXX\33VFLUSH\34MANAGED" \
+    "\35NESTED"
 
 
 /*
@@ -289,6 +293,8 @@
 struct buf *geteblk(int);
 struct buf *getnewbuf(int, int, int);
 struct buf *incore(struct vnode *, daddr_t);
+struct buf *getmanagedbuf(int, caddr_t, int);
+struct buf *nestbuf(struct buf *, int, caddr_t, int);
 
 void	minphys(struct buf *);
 int	physio(void (*)(struct buf *), struct buf *, dev_t, int,

--/9DWx/yDrRhgMJTb--

--U+BazGySraz5kW0T
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (NetBSD)

iQEVAwUBQ7qnjIKcNwBDyKpoAQJEewf/cjlMYY8WHQtuzbjuNCdoMBjSvXFAInJt
+0fYmhFXNvqvviNij2Xy58EIFuL7lJZPFeG6zLe5fxaC++hNJX+NIPhi7f5zYCt0
RoUdf/STMk54ylTgzNtZbYnkgp0XC1+7FfgyVpzwmzMkfDDidq/bhHWbz+EQMxBT
0rzbrV5tfHjQ/TqNKxOmIxyhquJlmYeYoA9mLT8odstiYzj433X9DFF08hKNPMxe
75pnDA5ZdiVsVx9VOU1jrlENEGnDqh0QDJeQrSPz4V6qxgrAznK8wns5okHTTMaQ
3p8zVV9APgzGOw3LZhJGNugcVJE9D99Czg+Cl403KKa0fD4DHmDGhA==
=6R6f
-----END PGP SIGNATURE-----

--U+BazGySraz5kW0T--