Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/external/bsd/atf/dist/atf-run Pull up upstream revision 648e...



details:   https://anonhg.NetBSD.org/src/rev/761caa3e6e22
branches:  trunk
changeset: 763666:761caa3e6e22
user:      jmmv <jmmv%NetBSD.org@localhost>
date:      Wed Mar 30 11:10:56 2011 +0000

description:
Pull up upstream revision 648ed6360b2b7cda81a6079b00dc436d09c745b8:

Retry calls that raise file system errors during cleanup

If a test case mounts user-space (puffs/fuse) file systems or spawns
server processes that create pid files, the termination of the
corresponding processes does not guarantee that the file system is
left in a consistent state immediately.  The cleanup routines of both
components (file systems and daemons) may still be running.

This situation causes a race condition between the termination of the
auxiliary processes and our own file system cleanup: the file system
calls performed from within the cleanup routine may raise errors
because the file system is still changing underneath.  (E.g. we first
enumerate the contents of a directory and get file X, but when we
attempt to delete file X, it may be gone.)

Deal with this by retrying failing file system calls a few times and
ignoring "expected" errors before giving up.

diffstat:

 external/bsd/atf/dist/atf-run/fs.cpp |  92 ++++++++++++++++++++++++++++-------
 1 files changed, 72 insertions(+), 20 deletions(-)

diffs (143 lines):

diff -r bb14c6a4ffd9 -r 761caa3e6e22 external/bsd/atf/dist/atf-run/fs.cpp
--- a/external/bsd/atf/dist/atf-run/fs.cpp      Wed Mar 30 10:32:35 2011 +0000
+++ b/external/bsd/atf/dist/atf-run/fs.cpp      Wed Mar 30 11:10:56 2011 +0000
@@ -1,7 +1,7 @@
 //
 // Automated Testing Framework (atf)
 //
-// Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
+// Copyright (c) 2007, 2008, 2009, 2011 The NetBSD Foundation, Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -45,6 +45,7 @@
 #include <cstring>
 
 #include "atf-c++/detail/process.hpp"
+#include "atf-c++/detail/sanity.hpp"
 
 #include "fs.hpp"
 #include "user.hpp"
@@ -61,6 +62,19 @@
                             bool);
 static void do_unmount(const atf::fs::path&);
 
+// The cleanup routines below are tricky: they are executed immediately after
+// a test case's death, and after we have forcibly killed any stale processes.
+// However, even if the processes are dead, this does not mean that the file
+// system we are scanning is stable.  In particular, if the test case has
+// mounted file systems through fuse/puffs, the fact that the processes died
+// does not mean that the file system is truly unmounted.
+//
+// The code below attempts to cope with this by catching errors and either
+// ignoring them or retrying the actions on the same file/directory a few times
+// before giving up.
+static const int max_retries = 5;
+static const int retry_delay_in_seconds = 1;
+
 // The erase parameter in this routine is to control nested mount points.
 // We want to descend into a mount point to unmount anything that is
 // mounted under it, but we do not want to delete any files while doing
@@ -70,19 +84,24 @@
 void
 cleanup_aux(const atf::fs::path& p, dev_t parent_device, bool erase)
 {
-    atf::fs::file_info fi(p);
+    try {
+        atf::fs::file_info fi(p);
 
-    if (fi.get_type() == atf::fs::file_info::dir_type)
-        cleanup_aux_dir(p, fi, fi.get_device() == parent_device);
+        if (fi.get_type() == atf::fs::file_info::dir_type)
+            cleanup_aux_dir(p, fi, fi.get_device() == parent_device);
+
+        if (fi.get_device() != parent_device)
+            do_unmount(p);
 
-    if (fi.get_device() != parent_device)
-        do_unmount(p);
-
-    if (erase) {
-        if (fi.get_type() == atf::fs::file_info::dir_type)
-            atf::fs::rmdir(p);
-        else
-            atf::fs::remove(p);
+        if (erase) {
+            if (fi.get_type() == atf::fs::file_info::dir_type)
+                atf::fs::rmdir(p);
+            else
+                atf::fs::remove(p);
+        }
+    } catch (const atf::system_error& e) {
+        if (e.code() != ENOENT && e.code() != ENOTDIR)
+            throw e;
     }
 }
 
@@ -92,15 +111,39 @@
                 bool erase)
 {
     if (erase && ((fi.get_mode() & S_IRWXU) != S_IRWXU)) {
-        if (chmod(p.c_str(), fi.get_mode() | S_IRWXU) == -1)
-            throw atf::system_error(IMPL_NAME "::cleanup(" +
-                                    p.str() + ")", "chmod(2) failed", errno);
+        int retries = max_retries;
+retry_chmod:
+        if (chmod(p.c_str(), fi.get_mode() | S_IRWXU) == -1) {
+            if (retries > 0) {
+                retries--;
+                ::sleep(retry_delay_in_seconds);
+                goto retry_chmod;
+            } else {
+                throw atf::system_error(IMPL_NAME "::cleanup(" +
+                                        p.str() + ")", "chmod(2) failed",
+                                        errno);
+            }
+        }
     }
 
     std::set< std::string > subdirs;
     {
-        const atf::fs::directory d(p);
-        subdirs = d.names();
+        bool ok = false;
+        int retries = max_retries;
+        while (!ok) {
+            INV(retries > 0);
+            try {
+                const atf::fs::directory d(p);
+                subdirs = d.names();
+                ok = true;
+            } catch (const atf::system_error& e) {
+                if (retries == 0)
+                    throw e;
+                retries--;
+                ::sleep(retry_delay_in_seconds);
+            }
+        }
+        INV(ok);
     }
 
     for (std::set< std::string >::const_iterator iter = subdirs.begin();
@@ -122,9 +165,18 @@
         in_path : in_path.to_absolute();
 
 #if defined(HAVE_UNMOUNT)
-    if (unmount(abs_path.c_str(), 0) == -1)
-        throw atf::system_error(IMPL_NAME "::cleanup(" + in_path.str() + ")",
-                                "unmount(2) failed", errno);
+    int retries = max_retries;
+retry_unmount:
+    if (unmount(abs_path.c_str(), 0) == -1) {
+        if (errno == EBUSY && retries > 0) {
+            retries--;
+            ::sleep(retry_delay_in_seconds);
+            goto retry_unmount;
+        } else {
+            throw atf::system_error(IMPL_NAME "::cleanup(" + in_path.str() +
+                                    ")", "unmount(2) failed", errno);
+        }
+    }
 #else
     // We could use umount(2) instead if it was available... but
     // trying to do so under, e.g. Linux, is a nightmare because we



Home | Main Index | Thread Index | Old Index