Subject: RFC: addition of B_MANAGED and B_NESTED to buf.h and vfs_bio.c
To: None <tech-kern@NetBSD.org>
From: Reinoud Zandijk <reinoud@netbsd.org>
List: tech-kern
Date: 01/03/2006 17:34:27
--U+BazGySraz5kW0T
Content-Type: multipart/mixed; boundary="/9DWx/yDrRhgMJTb"
Content-Disposition: inline
--/9DWx/yDrRhgMJTb
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Dear folks,
i'd like to add two flags to buf.h. They add new functionality to the
buffer structure without changing current behaviour.
--------------------
B_MANAGED
marks that its buffer contents memory is managed by the caller and should
not be put on a list for reuse or freed. Normally useable by say a
filingsystem that has a private memory buffer for say a descriptor or disc
structure thats not kept in a buffer and wants to write it out without the
need to manipulate the VM or to copy stuff around.
Managed buffers are claimed and released directly from/to the bufpool to
not destroy valueable data. Managed buffers can be brelse()'d.
B_NESTED
marks the buffer as a nested buffer. Nested buffers are currently used in
genfs and lfs (get/put pages) though implemented as two special case
generated buffers with call-backs.
A nested buffer is basicly describing the action of the origional buffer
over a piece of its buffer space. A series of nested buffers created from
the master buffer describe together the action. Nested buffers can have
their own call-back and variables too extending the idea used in genfs and
lfs.
Buffer nesting is transparant. A callee doesn't need to know anything of
the mechanism and can easily nest it again if it wants too.
biowait/biodone/brelse work as expected.
When a nested buffer is biodone()'d its master buffer will be updated. When
the b_resid value of the master buffer then reaches zero, biodone() will be
called on the master buffer.
Nested buffers are claimed and released directly from/to the bufpool to not
destroy valueable data. Nested buffers can be brelse()'d.
----------------
I've appended the patch. Note that this code has been tested for normal
operations but that the managed and nested buffers haven't actually been
used yet. I'd rather have feedback first before i change UDF to use the
buffer types. Genfs and lfs could prolly best be done by their authors.
With regards,
Reinoud
--/9DWx/yDrRhgMJTb
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=PATCH
Index: kern/vfs_bio.c
===================================================================
RCS file: /cvsroot/src/sys/kern/vfs_bio.c,v
retrieving revision 1.148
diff -u -r1.148 vfs_bio.c
--- kern/vfs_bio.c 24 Dec 2005 19:12:23 -0000 1.148
+++ kern/vfs_bio.c 3 Jan 2006 16:05:47 -0000
@@ -904,6 +904,16 @@
wakeup(bp);
}
+ /*
+ * If it's a managed buffer, only recycle struct buf since it doesn't
+ * own its data block and should not be cached or reused in the normal
+ * way.
+ */
+ if (ISSET(bp->b_flags, B_MANAGED)) {
+ bp->b_bufsize = 0;
+ goto clearup;
+ };
+
/*
* Determine which queue the buffer should be on, then put it there.
*/
@@ -990,6 +1000,7 @@
CLR(bp->b_flags, B_AGE|B_ASYNC|B_BUSY|B_NOCACHE);
SET(bp->b_flags, B_CACHE);
+clearup:
/* Allow disk interrupts. */
simple_unlock(&bp->b_interlock);
simple_unlock(&bqueue_slock);
@@ -1003,6 +1014,85 @@
}
/*
+ * Get a new managed buffer. Memory assigned to it is NOT owned by the buffer
+ * cache but by the caller, typically a filingsystem.
+ */
+struct buf *
+getmanagedbuf(int blkno, caddr_t base, int size)
+{
+ int s;
+ struct buf *bp;
+
+ KASSERT(base != NULL);
+ KASSERT(size > 0);
+
+ s = splbio();
+ simple_lock(&bqueue_slock);
+
+ /* please don't destroy valueable data */
+ bp = pool_get(&bufpool, PR_WAITOK);
+
+ /* Initialise buffer */
+ memset(bp, 0, sizeof(struct buf));
+ BUF_INIT(bp);
+ bp->b_flags |= B_MANAGED;
+
+ bp->b_blkno = blkno;
+ bp->b_data = base;
+ bp->b_bufsize = bp->b_bcount = bp->b_resid = size;
+
+ simple_unlock(&bqueue_slock);
+ splx(s);
+
+ return bp;
+}
+
+/*
+ * Get a new buffer nested into the specified buffer at the given offset and
+ * length. NO read/write actions ought to be caried out on the master buffer
+ * anymore only on the nested buffers as they effectively split up the master
+ * buffer's action.
+ *
+ * Bug alert: make sure all nested buffers cover the complete mbp->resid
+ * space. If space is to be skipped, mbp->resid needs to be accounted for or
+ * the biodone on mbp won't be called!
+ */
+struct buf *
+nestbuf(struct buf *mbp, int blkno, caddr_t base, int size)
+{
+ int s;
+ struct buf *bp;
+
+ KASSERT(base != NULL);
+ KASSERT(size > 0);
+ KASSERT((mpb==NULL) || (mpb && mbp->b_count < offset+size));
+
+ s = splbio();
+ simple_lock(&bqueue_slock);
+
+ /* please don't destroy valueable data */
+ bp = pool_get(&bufpool, PR_WAITOK);
+
+ /*
+ * Adjust relevant information from the master buffer. set nested info
+ * and clear callback info and softdep info.
+ */
+ memcpy(bp, mbp, sizeof(struct buf));
+ bp->b_flags &= ~B_CALL;
+ bp->b_flags |= B_MANAGED | B_NESTED;
+ bp->b_masterbuf = mbp;
+
+ bp->b_blkno = blkno;
+ bp->b_data = base;
+ bp->b_bufsize = bp->b_bcount = bp->b_resid = size;
+
+ /* avoid confusion for softdep? */
+ LIST_INIT(&bp->b_dep);
+
+ return bp;
+}
+
+/*
* Determine if a block is in the cache.
* Just look on what would be its hash chain. If it's there, return
* a pointer to it, unless it's marked invalid. If it's marked invalid,
@@ -1384,6 +1474,8 @@
biodone(struct buf *bp)
{
int s = splbio();
+ int isnested;
+ struct buf *mbp;
simple_lock(&bp->b_interlock);
if (ISSET(bp->b_flags, B_DONE))
@@ -1397,6 +1489,18 @@
if (!ISSET(bp->b_flags, B_READ)) /* wake up reader */
vwakeup(bp);
+ /* Propagate status to master buffer if its a nested buffer */
+ isnested = ISSET(bp->b_flags, B_NESTED);
+ mbp = bp->b_masterbuf;
+ if (isnested) {
+ KASSERT(mbp);
+ if (bp->b_flags & B_ERROR) {
+ mbp->b_flags |= B_ERROR;
+ mbp->b_error = bp->b_error;
+ };
+ mbp->b_resid -= bp->b_bcount;
+ };
+
/*
* If necessary, call out. Unlock the buffer before calling
* iodone() as the buffer isn't valid any more when it return.
@@ -1417,6 +1521,12 @@
}
splx(s);
+
+ /* call biodone on master buffer if its completed by this op */
+ if (isnested) {
+ if (mbp->b_resid == 0)
+ biodone(mbp);
+ };
}
/*
Index: sys/buf.h
===================================================================
RCS file: /cvsroot/src/sys/sys/buf.h,v
retrieving revision 1.84
diff -u -r1.84 buf.h
--- sys/buf.h 11 Dec 2005 12:25:20 -0000 1.84
+++ sys/buf.h 3 Jan 2006 16:05:47 -0000
@@ -145,6 +145,7 @@
struct vnode *b_vp; /* File vnode. */
struct workhead b_dep; /* List of filesystem dependencies. */
void *b_saveaddr; /* Original b_addr for physio. */
+ struct buf *b_masterbuf; /* If nested, points to masterbuf */
/*
* private data for owner.
@@ -213,11 +214,14 @@
#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
#define B_XXX 0x02000000 /* Debugging flag. */
#define B_VFLUSH 0x04000000 /* Buffer is being synced. */
+#define B_MANAGED 0x08000000 /* Buffer's memory is not its own */
+#define B_NESTED 0x10000000 /* Buffer is not standalone */
#define BUF_FLAGBITS \
"\20\1AGE\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" \
"\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \
- "\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED\32XXX\33VFLUSH"
+ "\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED\32XXX\33VFLUSH\34MANAGED" \
+ "\35NESTED"
/*
@@ -289,6 +293,8 @@
struct buf *geteblk(int);
struct buf *getnewbuf(int, int, int);
struct buf *incore(struct vnode *, daddr_t);
+struct buf *getmanagedbuf(int, caddr_t, int);
+struct buf *nestbuf(struct buf *, int, caddr_t, int);
void minphys(struct buf *);
int physio(void (*)(struct buf *), struct buf *, dev_t, int,
--/9DWx/yDrRhgMJTb--
--U+BazGySraz5kW0T
Content-Type: application/pgp-signature
Content-Disposition: inline
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (NetBSD)
iQEVAwUBQ7qnjIKcNwBDyKpoAQJEewf/cjlMYY8WHQtuzbjuNCdoMBjSvXFAInJt
+0fYmhFXNvqvviNij2Xy58EIFuL7lJZPFeG6zLe5fxaC++hNJX+NIPhi7f5zYCt0
RoUdf/STMk54ylTgzNtZbYnkgp0XC1+7FfgyVpzwmzMkfDDidq/bhHWbz+EQMxBT
0rzbrV5tfHjQ/TqNKxOmIxyhquJlmYeYoA9mLT8odstiYzj433X9DFF08hKNPMxe
75pnDA5ZdiVsVx9VOU1jrlENEGnDqh0QDJeQrSPz4V6qxgrAznK8wns5okHTTMaQ
3p8zVV9APgzGOw3LZhJGNugcVJE9D99Czg+Cl403KKa0fD4DHmDGhA==
=6R6f
-----END PGP SIGNATURE-----
--U+BazGySraz5kW0T--