Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/lib/librumpclient Allow to set a timeout for how long the cl...



details:   https://anonhg.NetBSD.org/src/rev/7756dfa001a6
branches:  trunk
changeset: 761442:7756dfa001a6
user:      pooka <pooka%NetBSD.org@localhost>
date:      Thu Jan 27 18:04:05 2011 +0000

description:
Allow to set a timeout for how long the client will attempt to
reconnect in case the connection to the server is lost.  Default
to exactly one reattempt.  This makes sense and additionally fixes
the dev/raidframe/smalldisk test which currently causes a server
panic when a certain raidctl command is run (without this fix the
test would timeout since the client kept attempting to reconnect).

diffstat:

 lib/librumpclient/rumpclient.c |  155 ++++++++++++++++++++++++++--------------
 lib/librumpclient/rumpclient.h |    6 +-
 2 files changed, 104 insertions(+), 57 deletions(-)

diffs (truncated from 302 to 300 lines):

diff -r 173534eb33b8 -r 7756dfa001a6 lib/librumpclient/rumpclient.c
--- a/lib/librumpclient/rumpclient.c    Thu Jan 27 17:38:04 2011 +0000
+++ b/lib/librumpclient/rumpclient.c    Thu Jan 27 18:04:05 2011 +0000
@@ -1,4 +1,4 @@
-/*      $NetBSD: rumpclient.c,v 1.19 2011/01/26 14:42:41 pooka Exp $   */
+/*      $NetBSD: rumpclient.c,v 1.20 2011/01/27 18:04:05 pooka Exp $   */
 
 /*
  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
@@ -82,27 +82,83 @@
 static int kq = -1;
 static sigset_t fullset;
 
-static int doconnect(int);
+static int doconnect(bool);
 static int handshake_req(struct spclient *, uint32_t *, int, bool);
 
-int didrecon;
+time_t retrytimo = RUMPCLIENT_RETRYCONN_ONCE;
 
 static int
 send_with_recon(struct spclient *spc, const void *data, size_t dlen)
 {
+       struct timeval starttime, curtime;
+       time_t prevreconmsg;
+       unsigned reconretries;
        int rv;
 
-       do {
+       for (prevreconmsg = 0, reconretries = 0;;) {
                rv = dosend(spc, data, dlen);
                if (__predict_false(rv == ENOTCONN || rv == EBADF)) {
-                       if ((rv = doconnect(1)) != 0)
+                       /* no persistent connections */
+                       if (retrytimo == 0)
+                               break;
+
+                       if (!prevreconmsg) {
+                               prevreconmsg = time(NULL);
+                               gettimeofday(&starttime, NULL);
+                       }
+                       if (reconretries == 1) {
+                               if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) {
+                                       rv = ENOTCONN;
+                                       break;
+                               }
+                               fprintf(stderr, "rump_sp: connection to "
+                                   "kernel lost, trying to reconnect ...\n");
+                       } else if (time(NULL) - prevreconmsg > 120) {
+                               fprintf(stderr, "rump_sp: still trying to "
+                                   "reconnect ...\n");
+                               prevreconmsg = time(NULL);
+                       }
+
+                       /* check that we aren't over the limit */
+                       if (retrytimo > 0) {
+                               struct timeval tmp;
+
+                               gettimeofday(&curtime, NULL);
+                               timersub(&curtime, &starttime, &tmp);
+                               if (tmp.tv_sec >= retrytimo) {
+                                       fprintf(stderr, "rump_sp: reconnect "
+                                           "failed, %lld second timeout\n",
+                                           (long long)retrytimo);
+                                       return ENOTCONN;
+                               }
+                       }
+
+                       /* adhoc backoff timer */
+                       if (reconretries < 10) {
+                               usleep(100000 * reconretries);
+                       } else {
+                               sleep(MIN(10, reconretries-9));
+                       }
+                       reconretries++;
+
+                       if ((rv = doconnect(false)) != 0)
                                continue;
                        if ((rv = handshake_req(&clispc, NULL, 0, true)) != 0)
                                continue;
-                       rv = ENOTCONN;
+
+                       /*
+                        * ok, reconnect succesful.  we need to return to
+                        * the upper layer to get the entire PDU resent.
+                        */
+                       if (reconretries != 1)
+                               fprintf(stderr, "rump_sp: reconnected!\n");
+                       rv = EAGAIN;
+                       break;
+               } else {
+                       _DIAGASSERT(errno != EAGAIN);
                        break;
                }
-       } while (__predict_false(rv != 0));
+       }
 
        return rv;
 }
@@ -235,7 +291,9 @@
                }
 
                rv = cliwaitresp(spc, &rw, &omask, false);
-       } while (rv == ENOTCONN || rv == EAGAIN);
+               if (rv == ENOTCONN)
+                       rv = EAGAIN;
+       } while (rv == EAGAIN);
        pthread_sigmask(SIG_SETMASK, &omask, NULL);
 
        *resp = rw.rw_data;
@@ -316,7 +374,9 @@
                }
 
                rv = cliwaitresp(spc, &rw, &omask, false);
-       } while (rv == ENOTCONN || rv == EAGAIN);
+               if (rv == ENOTCONN)
+                       rv = EAGAIN;
+       } while (rv == EAGAIN);
        pthread_sigmask(SIG_SETMASK, &omask, NULL);
 
        *resp = rw.rw_data;
@@ -474,10 +534,8 @@
 static struct sockaddr *serv_sa;
 
 static int
-doconnect(int retry)
+doconnect(bool noisy)
 {
-       time_t prevreconmsg;
-       unsigned reconretries;
        struct respwait rw;
        struct rsp_hdr rhdr;
        struct kevent kev[NSIG+1];
@@ -491,16 +549,9 @@
        kq = -1;
        s = -1;
 
-       prevreconmsg = 0;
-       reconretries = 0;
-
- again:
        if (clispc.spc_fd != -1)
                host_close(clispc.spc_fd);
        clispc.spc_fd = -1;
-       if (s != -1)
-               close(s);
-       s = -1;
 
        /*
         * for reconnect, gate everyone out of the receiver code
@@ -538,50 +589,33 @@
        while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) {
                if (errno == EINTR)
                        continue;
-               if (!retry) {
-                       error = errno;
+               error = errno;
+               if (noisy)
                        fprintf(stderr, "rump_sp: client connect failed: %s\n",
                            strerror(errno));
-                       errno = error;
-                       return -1;
-               }
-
-               if (prevreconmsg == 0) {
-                       fprintf(stderr, "rump_sp: connection to kernel lost, "
-                           "trying to reconnect ...\n");
-                       prevreconmsg = time(NULL);
-               }
-               if (time(NULL) - prevreconmsg > 120) {
-                       fprintf(stderr, "rump_sp: still trying to "
-                           "reconnect ...\n");
-                       prevreconmsg = time(NULL);
-               }
-
-               /* adhoc backoff timer */
-               if (reconretries++ < 10) {
-                       usleep(100000 * reconretries);
-               } else {
-                       sleep(MIN(10, reconretries-9));
-               }
-               goto again;
+               errno = error;
+               return -1;
        }
 
        if ((error = parsetab[ptab_idx].connhook(s)) != 0) {
                error = errno;
-               fprintf(stderr, "rump_sp: connect hook failed\n");
+               if (noisy)
+                       fprintf(stderr, "rump_sp: connect hook failed\n");
                errno = error;
                return -1;
        }
 
        if ((n = host_read(s, banner, sizeof(banner)-1)) < 0) {
                error = errno;
-               fprintf(stderr, "rump_sp: failed to read banner\n");
+               if (noisy)
+                       fprintf(stderr, "rump_sp: failed to read banner\n");
                errno = error;
                return -1;
        }
 
        if (banner[n-1] != '\n') {
-               fprintf(stderr, "rump_sp: invalid banner\n");
+               if (noisy)
+                       fprintf(stderr, "rump_sp: invalid banner\n");
                errno = EINVAL;
                return -1;
        }
@@ -590,8 +624,9 @@
 
        flags = host_fcntl(s, F_GETFL, 0);
        if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) {
-               fprintf(stderr, "rump_sp: socket fd NONBLOCK: %s\n",
-                   strerror(errno));
+               if (noisy)
+                       fprintf(stderr, "rump_sp: socket fd NONBLOCK: %s\n",
+                           strerror(errno));
                errno = EINVAL;
                return -1;
        }
@@ -599,14 +634,11 @@
        clispc.spc_state = SPCSTATE_RUNNING;
        clispc.spc_reconnecting = 0;
 
-       if (prevreconmsg) {
-               fprintf(stderr, "rump_sp: reconnected!\n");
-       }
-
        /* setup kqueue, we want all signals and the fd */
        if ((kq = host_kqueue()) == -1) {
                error = errno;
-               fprintf(stderr, "rump_sp: cannot setup kqueue");
+               if (noisy)
+                       fprintf(stderr, "rump_sp: cannot setup kqueue");
                errno = error;
                return -1;
        }
@@ -618,7 +650,8 @@
            EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
        if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) {
                error = errno;
-               fprintf(stderr, "rump_sp: kevent() failed");
+               if (noisy)
+                       fprintf(stderr, "rump_sp: kevent() failed");
                errno = error;
                return -1;
        }
@@ -683,7 +716,7 @@
 
        if (doinit() == -1)
                return -1;
-       if (doconnect(0) == -1)
+       if (doconnect(true) == -1)
                return -1;
 
        error = handshake_req(&clispc, NULL, 0, false);
@@ -738,7 +771,7 @@
 
        if (doinit() == -1)
                return -1;
-       if (doconnect(1) == -1)
+       if (doconnect(false) == -1)
                return -1;
 
        error = handshake_req(&clispc, rpf->fork_auth, 0, false);
@@ -751,3 +784,13 @@
 
        return 0;
 }
+
+void
+rumpclient_setconnretry(time_t timeout)
+{
+
+       if (timeout < RUMPCLIENT_RETRYCONN_ONCE)
+               return; /* gigo */
+
+       retrytimo = timeout;
+}
diff -r 173534eb33b8 -r 7756dfa001a6 lib/librumpclient/rumpclient.h
--- a/lib/librumpclient/rumpclient.h    Thu Jan 27 17:38:04 2011 +0000
+++ b/lib/librumpclient/rumpclient.h    Thu Jan 27 18:04:05 2011 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: rumpclient.h,v 1.2 2011/01/05 17:14:50 pooka Exp $     */
+/*     $NetBSD: rumpclient.h,v 1.3 2011/01/27 18:04:05 pooka Exp $     */
 
 /*-
  * Copyright (c) 2010 Antti Kantee.  All Rights Reserved.
@@ -37,6 +37,10 @@
 struct rumpclient_fork *rumpclient_prefork(void);
 int                    rumpclient_fork_init(struct rumpclient_fork *);
 
+#define RUMPCLIENT_RETRYCONN_INFTIME ((time_t)-1)
+#define RUMPCLIENT_RETRYCONN_ONCE ((time_t)-2)
+void rumpclient_setconnretry(time_t);
+
 __END_DECLS



Home | Main Index | Thread Index | Old Index