Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src Serialize access to file size. We already have such a thing ...



details:   https://anonhg.NetBSD.org/src/rev/a04dbb880878
branches:  trunk
changeset: 769428:a04dbb880878
user:      manu <manu%NetBSD.org@localhost>
date:      Fri Sep 09 15:45:28 2011 +0000

description:
Serialize access to file size. We already have such a thing in the
kernel, where it fixes race for PUFFS filesystems, but we need it again
in perfused since FUSE filesystems are allowed to reorder requests.

The huge issue is in the asyncrhonous SETATTR sent by fsync. It is
followed by a syncrhnous FSYNC, so if the filesystem does not reorder
requests, once the FSYNC returns, we are confident the SETATTR is done.
But since FUSE can reorder, we need to implement sync in perfused.

diffstat:

 lib/libperfuse/ops.c          |  136 ++++++++++++++++++++++++++++++++++-------
 lib/libperfuse/perfuse_if.h   |   27 ++++----
 lib/libperfuse/perfuse_priv.h |    6 +-
 usr.sbin/perfused/Makefile    |    4 +-
 usr.sbin/perfused/perfused.c  |    4 +-
 5 files changed, 134 insertions(+), 43 deletions(-)

diffs (truncated from 390 to 300 lines):

diff -r 55ec8ebbf996 -r a04dbb880878 lib/libperfuse/ops.c
--- a/lib/libperfuse/ops.c      Fri Sep 09 15:35:22 2011 +0000
+++ b/lib/libperfuse/ops.c      Fri Sep 09 15:45:28 2011 +0000
@@ -1,4 +1,4 @@
-/*  $NetBSD: ops.c,v 1.39 2011/08/13 23:12:15 christos Exp $ */
+/*  $NetBSD: ops.c,v 1.40 2011/09/09 15:45:28 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
@@ -1487,14 +1487,22 @@
 {
        perfuse_msg_t *pm;
        struct perfuse_state *ps;
+       struct perfuse_node_data *pnd = PERFUSE_NODE_DATA(opc);
        struct fuse_getattr_in *fgi;
        struct fuse_attr_out *fao;
        u_quad_t va_size;
        int error;
        
-       if (PERFUSE_NODE_DATA(opc)->pnd_flags & PND_REMOVED)
+       if (pnd->pnd_flags & PND_REMOVED)
                return ENOENT;
 
+       /* 
+        * Serialize size access, see comment in perfuse_node_setattr().
+        */
+       while (pnd->pnd_flags & PND_INRESIZE)
+               requeue_request(pu, opc, PCQ_RESIZE);
+       pnd->pnd_flags |= PND_INRESIZE;
+
        ps = puffs_getspecific(pu);
        va_size = vap->va_size;
 
@@ -1513,11 +1521,22 @@
                fgi->getattr_flags |= FUSE_GETATTR_FH;
        }
 
+#ifdef PERFUSE_DEBUG
+       if (perfuse_diagflags & PDF_RESIZE)
+               DPRINTF(">> %s %p %lld\n", __func__, (void *)opc, va_size);
+#endif
+
        if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
                goto out;
 
        fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
 
+#ifdef PERFUSE_DEBUG
+       if (perfuse_diagflags & PDF_RESIZE)
+               DPRINTF("<< %s %p %lld -> %lld\n", __func__, (void *)opc, 
+                       va_size, fao->attr.size);
+#endif
+
        /* 
         * The message from filesystem has a cache timeout
         * XXX this is ignored yet, is that right?
@@ -1528,16 +1547,12 @@
         */
        fuse_attr_to_vap(ps, vap, &fao->attr);
 
-       /*
-        * If a write is in progress, do not trust filesystem opinion 
-        * of file size, use the one from kernel.
-        */
-       if ((PERFUSE_NODE_DATA(opc)->pnd_flags & PND_INWRITE) &&
-           (va_size != (u_quad_t)PUFFS_VNOVAL))
-               vap->va_size = MAX(va_size, vap->va_size);;
 out:
        ps->ps_destroy_msg(pm);
 
+       pnd->pnd_flags &= ~PND_INRESIZE;
+       (void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
+
        return error;
 }
 
@@ -1555,8 +1570,11 @@
        struct fuse_setattr_in *fsi;
        struct fuse_attr_out *fao;
        struct vattr *old_va;
-       u_quad_t va_size;
        int error;
+#ifdef PERFUSE_DEBUG
+       struct vattr *old_vap;
+       int resize_debug = 0;
+#endif
 
        ps = puffs_getspecific(pu);
        pnd = PERFUSE_NODE_DATA(opc);
@@ -1611,16 +1629,6 @@
                                old_va->va_type, vap->va_mode, pcr)) != 0)
                return EACCES;
        
-       /*
-        * If a write is in progress, set the highest
-        * value in the filesystem, otherwise we break 
-        * IO_APPEND.
-        */
-       va_size = vap->va_size;
-       if ((pnd->pnd_flags & PND_INWRITE) &&
-           (va_size != (u_quad_t)PUFFS_VNOVAL))
-               va_size = MAX(va_size, old_va->va_size);
-
        pm = ps->ps_new_msg(pu, opc, FUSE_SETATTR, sizeof(*fsi), pcr);
        fsi = GET_INPAYLOAD(ps, pm, fuse_setattr_in);
        fsi->valid = 0;
@@ -1634,9 +1642,19 @@
                fsi->valid |= FUSE_FATTR_FH;
        }
 
-       if (va_size != (u_quad_t)PUFFS_VNOVAL) {
-               fsi->size = va_size;
+       if (vap->va_size != (u_quad_t)PUFFS_VNOVAL) {
+               fsi->size = vap->va_size;
                fsi->valid |= FUSE_FATTR_SIZE;
+
+               /* 
+                * Serialize anything that can touch file size
+                * to avoid reordered GETATTR and SETATTR.
+                * Out of order SETATTR can report stale size,
+                * which will cause the kernel to truncate the file.
+                */
+               while (pnd->pnd_flags & PND_INRESIZE)
+                       requeue_request(pu, opc, PCQ_RESIZE);
+               pnd->pnd_flags |= PND_INRESIZE;
        }
 
        /*
@@ -1696,7 +1714,7 @@
         * Try to adapt and remove FATTR_ATIME|FATTR_MTIME
         * if we suspect a ftruncate().
         */ 
-       if ((va_size != (u_quad_t)PUFFS_VNOVAL) &&
+       if ((vap->va_size != (u_quad_t)PUFFS_VNOVAL) &&
            ((vap->va_mode == (mode_t)PUFFS_VNOVAL) &&
             (vap->va_uid == (uid_t)PUFFS_VNOVAL) &&
             (vap->va_gid == (gid_t)PUFFS_VNOVAL))) {
@@ -1716,6 +1734,19 @@
                goto out;
        }
 
+#ifdef PERFUSE_DEBUG
+       old_vap = puffs_pn_getvap((struct puffs_node *)opc);
+
+       if ((perfuse_diagflags & PDF_RESIZE) &&
+           (old_vap->va_size != (u_quad_t)PUFFS_VNOVAL)) {
+               resize_debug = 1;
+
+               DPRINTF(">> %s %p %lld -> %lld\n", __func__, (void *)opc, 
+                       puffs_pn_getvap((struct puffs_node *)opc)->va_size, 
+                       fsi->size);
+       }
+#endif
+
        if ((error = xchg_msg(pu, opc, pm, sizeof(*fao), wait_reply)) != 0)
                goto out;
 
@@ -1723,12 +1754,23 @@
         * Copy back the new values 
         */
        fao = GET_OUTPAYLOAD(ps, pm, fuse_attr_out);
+
+#ifdef PERFUSE_DEBUG
+       if (resize_debug)
+               DPRINTF("<< %s %p %lld -> %lld\n", __func__, (void *)opc, 
+                       old_vap->va_size, fao->attr.size);
+#endif
+
        fuse_attr_to_vap(ps, old_va, &fao->attr);
 out:
-
        if (pm != NULL)
                ps->ps_destroy_msg(pm);
 
+       if (pnd->pnd_flags & PND_INRESIZE) {
+               pnd->pnd_flags &= ~PND_INRESIZE;
+               (void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
+       }
+
        return error;
 }
 
@@ -2781,6 +2823,7 @@
 {
        struct perfuse_state *ps;
        struct perfuse_node_data *pnd;
+       const struct vattr *vap;
        perfuse_msg_t *pm;
        struct fuse_read_in *fri;
        struct fuse_out_header *foh;
@@ -2789,8 +2832,13 @@
        
        ps = puffs_getspecific(pu);
        pnd = PERFUSE_NODE_DATA(opc);
+       vap = puffs_pn_getvap((struct puffs_node *)opc);
        pm = NULL;
 
+       if (offset + *resid > vap->va_size)
+               DWARNX("%s %p read %lld@%d beyond EOF %lld\n",
+                      __func__, (void *)opc, offset, *resid, vap->va_size);
+
        do {
                size_t max_read;
 
@@ -2869,12 +2917,14 @@
        size_t data_len;
        size_t payload_len;
        size_t written;
+       int inresize;
        int error;
        
        ps = puffs_getspecific(pu);
        pnd = PERFUSE_NODE_DATA(opc);
        vap = puffs_pn_getvap((struct puffs_node *)opc);
        written = 0;
+       inresize = 0;
        pm = NULL;
 
        if (vap->va_type == VDIR) 
@@ -2888,11 +2938,23 @@
                requeue_request(pu, opc, PCQ_WRITE);
        pnd->pnd_flags |= PND_INWRITE;
 
+       /* 
+        * Serialize size access, see comment in perfuse_node_setattr().
+        */
+       if (offset + *resid > vap->va_size) {
+               while (pnd->pnd_flags & PND_INRESIZE)
+                       requeue_request(pu, opc, PCQ_RESIZE);
+               pnd->pnd_flags |= PND_INRESIZE;
+               inresize = 1;
+       }
+
        /*
         * append flag: re-read the file size so that 
         * we get the latest value.
         */
        if (ioflag & PUFFS_IO_APPEND) {
+               DWARNX("%s: PUFFS_IO_APPEND set, untested code", __func__);
+
                if ((error = perfuse_node_getattr(pu, opc, vap, pcr)) != 0)
                        goto out;
 
@@ -2901,6 +2963,12 @@
 
        pm = NULL;
 
+#ifdef PERFUSE_DEBUG
+       if (perfuse_diagflags & PDF_RESIZE)
+               DPRINTF(">> %s %p %lld \n", __func__,
+                       (void *)opc, vap->va_size);
+#endif
+
        do {
                size_t max_write;
                /*
@@ -2967,12 +3035,32 @@
        if (*resid != 0)
                error = EFBIG;
 
+#ifdef PERFUSE_DEBUG
+       if (perfuse_diagflags & PDF_RESIZE) {
+               if (offset > (off_t)vap->va_size)
+                       DPRINTF("<< %s %p %lld -> %lld\n", __func__, 
+                               (void *)opc, vap->va_size, offset);
+               else
+                       DPRINTF("<< %s %p \n", __func__, (void *)opc);
+       }
+#endif
+
        /*
         * Update file size if we wrote beyond the end
         */
        if (offset > (off_t)vap->va_size) 
                vap->va_size = offset;
 
+       if (inresize) {
+#ifdef PERFUSE_DEBUG
+               if (!(pnd->pnd_flags & PND_INRESIZE))
+                       DERRX(EX_SOFTWARE, "file write grow without resize");
+#endif
+               pnd->pnd_flags &= ~PND_INRESIZE;
+               (void)dequeue_requests(ps, opc, PCQ_RESIZE, DEQUEUE_ALL);
+       }
+
+
        /*
         * Statistics
         */
diff -r 55ec8ebbf996 -r a04dbb880878 lib/libperfuse/perfuse_if.h
--- a/lib/libperfuse/perfuse_if.h       Fri Sep 09 15:35:22 2011 +0000
+++ b/lib/libperfuse/perfuse_if.h       Fri Sep 09 15:45:28 2011 +0000
@@ -1,4 +1,4 @@
-/*  $NetBSD: perfuse_if.h,v 1.15 2011/08/14 08:19:04 christos Exp $ */
+/*  $NetBSD: perfuse_if.h,v 1.16 2011/09/09 15:45:28 manu Exp $ */
 
 /*-
  *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
@@ -40,18 +40,19 @@
  * Diagnostic flags. This global is used only for DPRINTF/DERR/DWARN



Home | Main Index | Thread Index | Old Index