Subject: Re: genfs_putpages with PGO_SYNCIO
To: None <enami@sm.sony.co.jp>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 01/22/2003 21:38:02
--NextPart-20030122212753-0037300
Content-Type: Text/Plain; charset=us-ascii
> > Yes. IMHO, they (new one vs. struct buf and bio* routine) describe
> > different thing (the former is just related to GOP_PUTPAGES and
> > GOP_WRITE). E.g., if one might want to limit the number of async
> > write issued at a time, the code will wait in the middle of loop in
> > _putpages routine. For this, not using struct buf will be cleaner.
i made a new patch. how about this one?
YAMAMOTO Takashi
--NextPart-20030122212753-0037300
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="genfs.syncio6.diff"
Index: sys/buf.h
===================================================================
RCS file: /cvs/NetBSD/src/sys/sys/buf.h,v
retrieving revision 1.55
diff -u -p -r1.55 buf.h
--- sys/buf.h 2002/10/06 17:05:56 1.55
+++ sys/buf.h 2003/01/22 12:34:32
@@ -217,6 +217,7 @@ struct buf {
#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
#define B_XXX 0x02000000 /* Debugging flag. */
#define B_VFLUSH 0x04000000 /* Buffer is being synced. */
+#define B_STABLE 0x08000000 /* Data should go to stable storage */
/*
* This structure describes a clustered I/O. It is stored in the b_saveaddr
Index: nfs/nfs_bio.c
===================================================================
RCS file: /cvs/NetBSD/src/sys/nfs/nfs_bio.c,v
retrieving revision 1.86
diff -u -p -r1.86 nfs_bio.c
--- nfs/nfs_bio.c 2003/01/18 09:34:30 1.86
+++ nfs/nfs_bio.c 2003/01/22 12:34:32
@@ -991,7 +991,7 @@ nfs_doio(bp, p)
struct vm_page *pgs[npages];
boolean_t needcommit = TRUE;
- if ((bp->b_flags & B_ASYNC) != 0 && NFS_ISV3(vp)) {
+ if ((bp->b_flags & B_STABLE) == 0 && NFS_ISV3(vp)) {
iomode = NFSV3WRITE_UNSTABLE;
} else {
iomode = NFSV3WRITE_FILESYNC;
Index: nfs/nfs_node.c
===================================================================
RCS file: /cvs/NetBSD/src/sys/nfs/nfs_node.c,v
retrieving revision 1.56
diff -u -p -r1.56 nfs_node.c
--- nfs/nfs_node.c 2002/12/01 23:02:10 1.56
+++ nfs/nfs_node.c 2003/01/22 12:34:32
@@ -79,7 +79,8 @@ extern int prtactive;
void nfs_gop_size(struct vnode *, off_t, off_t *);
int nfs_gop_alloc(struct vnode *, off_t, off_t, int, struct ucred *);
-int nfs_gop_write(struct vnode *, struct vm_page **, int, int);
+int nfs_gop_write(struct vnode *, struct vm_page **, int, int,
+ struct genfs_iodesc *);
struct genfs_ops nfs_genfsops = {
nfs_gop_size,
@@ -323,12 +324,13 @@ nfs_gop_alloc(struct vnode *vp, off_t of
}
int
-nfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
+nfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags,
+ struct genfs_iodesc *iodesc)
{
int i;
for (i = 0; i < npages; i++) {
pmap_page_protect(pgs[i], VM_PROT_READ);
}
- return genfs_gop_write(vp, pgs, npages, flags);
+ return genfs_gop_write(vp, pgs, npages, flags, iodesc);
}
Index: nfs/nfsnode.h
===================================================================
RCS file: /cvs/NetBSD/src/sys/nfs/nfsnode.h,v
retrieving revision 1.37
diff -u -p -r1.37 nfsnode.h
--- nfs/nfsnode.h 2002/12/01 23:02:11 1.37
+++ nfs/nfsnode.h 2003/01/22 12:34:33
@@ -244,7 +244,8 @@ int nfs_truncate __P((void *));
int nfs_update __P((void *));
int nfs_getpages __P((void *));
int nfs_putpages __P((void *));
-int nfs_gop_write(struct vnode *, struct vm_page **, int, int);
+int nfs_gop_write(struct vnode *, struct vm_page **, int, int,
+ struct genfs_iodesc *);
int nfs_kqfilter __P((void *));
extern int (**nfsv2_vnodeop_p) __P((void *));
Index: miscfs/genfs/genfs_node.h
===================================================================
RCS file: /cvs/NetBSD/src/sys/miscfs/genfs/genfs_node.h,v
retrieving revision 1.3
diff -u -p -r1.3 genfs_node.h
--- miscfs/genfs/genfs_node.h 2001/12/18 07:49:36 1.3
+++ miscfs/genfs/genfs_node.h 2003/01/22 12:34:33
@@ -35,18 +35,24 @@
struct vm_page;
+struct genfs_iodesc {
+ volatile int gid_iocount; /* number of pending i/o */
+ int gid_error; /* error number if any */
+};
+
struct genfs_ops {
void (*gop_size)(struct vnode *, off_t, off_t *);
int (*gop_alloc)(struct vnode *, off_t, off_t, int, struct ucred *);
- int (*gop_write)(struct vnode *, struct vm_page **, int, int);
+ int (*gop_write)(struct vnode *, struct vm_page **, int, int,
+ struct genfs_iodesc *);
};
#define GOP_SIZE(vp, size, eobp) \
(*VTOG(vp)->g_op->gop_size)((vp), (size), (eobp))
#define GOP_ALLOC(vp, off, len, flags, cred) \
(*VTOG(vp)->g_op->gop_alloc)((vp), (off), (len), (flags), (cred))
-#define GOP_WRITE(vp, pgs, npages, flags) \
- (*VTOG(vp)->g_op->gop_write)((vp), (pgs), (npages), (flags))
+#define GOP_WRITE(vp, pgs, npages, flags, iodesc) \
+ (*VTOG(vp)->g_op->gop_write)((vp), (pgs), (npages), (flags), (iodesc))
struct genfs_node {
struct genfs_ops *g_op; /* ops vector */
@@ -57,7 +63,12 @@ struct genfs_node {
void genfs_size(struct vnode *, off_t, off_t *);
void genfs_node_init(struct vnode *, struct genfs_ops *);
-int genfs_gop_write(struct vnode *, struct vm_page **, int, int);
-int genfs_compat_gop_write(struct vnode *, struct vm_page **, int, int);
+int genfs_gop_write(struct vnode *, struct vm_page **, int, int,
+ struct genfs_iodesc *);
+int genfs_compat_gop_write(struct vnode *, struct vm_page **, int, int,
+ struct genfs_iodesc *);
+
+void genfs_aio_biodone(struct buf *);
+int genfs_iowait(const struct genfs_iodesc *);
#endif /* _MISCFS_GENFS_GENFS_NODE_H_ */
Index: miscfs/genfs/genfs_vnops.c
===================================================================
RCS file: /cvs/NetBSD/src/sys/miscfs/genfs/genfs_vnops.c,v
retrieving revision 1.70
diff -u -p -r1.70 genfs_vnops.c
--- miscfs/genfs/genfs_vnops.c 2003/01/21 00:01:14 1.70
+++ miscfs/genfs/genfs_vnops.c 2003/01/22 12:34:33
@@ -1028,6 +1028,7 @@ genfs_putpages(void *v)
struct vnode *vp = ap->a_vp;
struct uvm_object *uobj = &vp->v_uobj;
struct simplelock *slock = &uobj->vmobjlock;
+ struct genfs_iodesc *iodesc = NULL;
off_t startoff = ap->a_offlo;
off_t endoff = ap->a_offhi;
off_t off;
@@ -1039,6 +1040,7 @@ genfs_putpages(void *v)
boolean_t wasclean, by_list, needs_clean, yield;
boolean_t async = (flags & PGO_SYNCIO) == 0;
boolean_t pagedaemon = curproc == uvm.pagedaemon_proc;
+ boolean_t metbusy = FALSE;
struct lwp *l = curlwp ? curlwp : &lwp0;
UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
@@ -1108,18 +1110,28 @@ genfs_putpages(void *v)
KASSERT(pg == NULL ||
(pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
(pg->flags & PG_BUSY) != 0);
+
if (by_list) {
+ boolean_t outofrange;
+
if (pg == &endmp) {
break;
}
- if (pg->offset < startoff || pg->offset >= endoff ||
+
+ outofrange = (pg->offset < startoff ||
+ pg->offset >= endoff);
+ if (outofrange ||
pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
+ if (!outofrange)
+ metbusy = TRUE;
pg = TAILQ_NEXT(pg, listq);
continue;
}
off = pg->offset;
} else if (pg == NULL ||
pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
+ if (pg != NULL)
+ metbusy = TRUE;
off += PAGE_SIZE;
if (off < endoff) {
pg = uvm_pagelookup(uobj, off);
@@ -1287,7 +1299,22 @@ genfs_putpages(void *v)
listq);
}
simple_unlock(slock);
- error = GOP_WRITE(vp, pgs, npages, flags);
+ if (!async && iodesc == NULL) {
+ /*
+ * XXX should use stack or pool?
+ */
+ MALLOC(iodesc, struct genfs_iodesc *,
+ sizeof(*iodesc), M_TEMP, M_WAITOK);
+ /*
+ * set gid_iocount to 1 now so that
+ * i/o completion event can't occur
+ * before we issue all writes.
+ * see below.
+ */
+ iodesc->gid_iocount = 1;
+ iodesc->gid_error = 0;
+ }
+ error = GOP_WRITE(vp, pgs, npages, flags, iodesc);
simple_lock(slock);
if (by_list) {
pg = TAILQ_NEXT(&curmp, listq);
@@ -1338,23 +1365,45 @@ genfs_putpages(void *v)
vp->v_flag &= ~VONWORKLST;
LIST_REMOVE(vp, v_synclist);
}
- splx(s);
- if (!wasclean && !async) {
- s = splbio();
+ if (iodesc != NULL && metbusy) {
+ KASSERT(!async);
+ KASSERT(!wasclean);
+
+ /*
+ * If we met pages being paged out by other threads,
+ * we should wait for all outputs.
+ */
while (vp->v_numoutput != 0) {
vp->v_flag |= VBWAIT;
UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, slock, FALSE,
"genput2", 0);
simple_lock(slock);
}
+ /*
+ * As we drained all writes for this vnode,
+ * ours was also completed, of course.
+ */
+ KASSERT(iodesc->gid_iocount == 1);
+ }
+ splx(s);
+ simple_unlock(slock);
+ if (iodesc != NULL) {
+ KASSERT(!async);
+ KASSERT(!wasclean);
+
+ s = splbio();
+ KASSERT(iodesc->gid_iocount >= 1);
+ iodesc->gid_iocount--;
+ error = genfs_iowait(iodesc);
splx(s);
+ FREE(iodesc, M_TEMP);
}
- simple_unlock(&uobj->vmobjlock);
return (error);
}
int
-genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
+genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags,
+ struct genfs_iodesc *iodesc)
{
int s, error, run;
int fs_bshift, dev_bshift;
@@ -1367,6 +1416,7 @@ genfs_gop_write(struct vnode *vp, struct
struct vnode *devvp;
boolean_t async = (flags & PGO_SYNCIO) == 0;
UVMHIST_FUNC("genfs_gop_write"); UVMHIST_CALLED(ubchist);
+ KASSERT((async && iodesc == NULL) || (!async && iodesc != NULL));
UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
vp, pgs, npages, flags);
@@ -1398,10 +1448,19 @@ genfs_gop_write(struct vnode *vp, struct
mbp->b_bufsize = npages << PAGE_SHIFT;
mbp->b_data = (void *)kva;
mbp->b_resid = mbp->b_bcount = bytes;
- mbp->b_flags = B_BUSY|B_WRITE|B_AGE| (async ? (B_CALL|B_ASYNC) : 0);
- mbp->b_iodone = uvm_aio_biodone;
+ mbp->b_flags = B_BUSY|B_WRITE|B_AGE|B_CALL|B_ASYNC;
mbp->b_vp = vp;
LIST_INIT(&mbp->b_dep);
+ if (iodesc) {
+ mbp->b_iodone = genfs_aio_biodone;
+ mbp->b_private = iodesc;
+ s = splbio();
+ iodesc->gid_iocount++;
+ splx(s);
+ }
+ else {
+ mbp->b_iodone = uvm_aio_biodone;
+ }
bp = NULL;
for (offset = startoffset;
@@ -1438,11 +1497,11 @@ genfs_gop_write(struct vnode *vp, struct
bp->b_resid = bp->b_bcount = iobytes;
bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
bp->b_iodone = uvm_aio_biodone1;
+ bp->b_private = mbp;
bp->b_vp = vp;
LIST_INIT(&bp->b_dep);
}
bp->b_lblkno = 0;
- bp->b_private = mbp;
if (devvp->v_type == VBLK) {
bp->b_dev = devvp->v_rdev;
}
@@ -1450,6 +1509,9 @@ genfs_gop_write(struct vnode *vp, struct
/* adjust physical blkno for partial blocks */
bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
dev_bshift);
+ if (!async) {
+ bp->b_flags |= B_STABLE;
+ }
UVMHIST_LOG(ubchist,
"vp %p offset 0x%x bcount 0x%x blkno 0x%x",
vp, offset, bp->b_bcount, bp->b_blkno);
@@ -1472,9 +1534,10 @@ genfs_gop_write(struct vnode *vp, struct
UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
return (0);
}
- UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
- error = biowait(mbp);
- uvm_aio_aiodone(mbp);
+ /*
+ * We don't have to wait for I/O completions even for PGO_SYNCIO
+ * because genfs_putpages will wait for us.
+ */
UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
return (error);
}
@@ -1607,9 +1670,10 @@ genfs_compat_getpages(void *v)
return (error);
}
+/* ARGSUSED */
int
genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
- int flags)
+ int flags, struct genfs_iodesc *iodesc)
{
off_t offset;
struct iovec iov;
@@ -1730,4 +1794,48 @@ genfs_kqfilter(void *v)
SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext);
return (0);
+}
+
+/*
+ * biodone routine for GOP_WRITE.
+ *
+ * must be called at splbio.
+ * XXX interlock
+ */
+void
+genfs_aio_biodone(struct buf *bp)
+{
+ struct genfs_iodesc *iodesc = bp->b_private;
+
+ KASSERT(iodesc->gid_iocount > 0);
+
+ if (bp->b_flags & B_ERROR) {
+ iodesc->gid_error = (bp->b_error ? bp->b_error : EIO);
+ }
+
+ if ((--iodesc->gid_iocount) == 0) {
+ wakeup(iodesc);
+ }
+
+ uvm_aio_biodone(bp);
+}
+
+/*
+ * wait for i/o completion.
+ *
+ * must be called at splbio.
+ * XXX interlock
+ */
+int
+genfs_iowait(const struct genfs_iodesc *iodesc)
+{
+
+ while (iodesc->gid_iocount > 0) {
+ /* LINTED const cast away */
+ void *waitpoint = (void *)iodesc;
+
+ (void)ltsleep(waitpoint, PRIBIO + 1, "giowait", 0, 0);
+ }
+
+ return iodesc->gid_error;
}
--NextPart-20030122212753-0037300--