tech-kern archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

SOCK_SEQPACKT for Unix sockets



Hi

I am trying to revive a  years old submission that implemented
SOCK_SEQPACKET for Unix sockets. Here is the original patch:
http://mail-index.netbsd.org/tech-kern/2003/04/14/0006.html

Attached is my patch against -current

I do not really know what I am doing here and therefore seek comments.
The initial patch contained some code that "changed the way backpressure
is maintained on SOCK_STREAM/SOCK_SEQPACKET". I removed it since it
also touched SOCK_STREAM, but that may be a bad choice, I would like to 
hear about that.

Also, I added code to handle SOCK_SEQPACKET at 3 places where it was not
done in the original patch, with the assumption that SOCK_SECPACKET should
really behave just like SOCK_STREAM? except for atomicity. Here are the
3 locations, I am not sure it was really relevant:
  unp_attach()
  uipc_usrreq()/PRU_SENSE
  unp_shutdown()

For the test case, I modified libperfuse/perfused to use SOCK_SEQPACKET
instead of SOCK_STREAM and it seems to work fine. That is supposed to 
fix the bug where the kernel sends more data than perfused can handle 
just in time, causing packet loss in the FUSE socket if SOCK_DGRAM is used. 
The result is a vnode stuck locked in kernel because the operation was 
sent but never got a reply. I have not been able to build a test case 
for that bug since I have trouble to reproduce it at will for now.

-- 
Emmanuel Dreyfus
manu%netbsd.org@localhost
--- src/sys/kern/uipc_proto.c.orig      2008-04-27 15:16:58.000000000 +0200
+++ src/sys/kern/uipc_proto.c   2011-05-23 15:24:33.000000000 +0200
@@ -64,8 +64,15 @@
                .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS,
                .pr_ctloutput = uipc_ctloutput,
                .pr_usrreq = uipc_usrreq,
        }, {
+               .pr_type = SOCK_SEQPACKET,
+               .pr_domain = &unixdomain,
+               .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS|PR_LISTEN|
+                           PR_ATOMIC,
+               .pr_ctloutput = uipc_ctloutput,
+               .pr_usrreq = uipc_usrreq,
+       }, {
                .pr_input = raw_input,
                .pr_ctlinput = raw_ctlinput,
                .pr_usrreq = raw_usrreq,
                .pr_init = raw_init,
--- src/sys/kern/uipc_usrreq.c.orig     2010-11-20 01:53:20.000000000 +0100
+++ src/sys/kern/uipc_usrreq.c  2011-05-23 15:32:58.000000000 +0200
@@ -123,9 +123,9 @@
 /*
  * Unix communications domain.
  *
  * TODO:
- *     SEQPACKET, RDM
+ *     RDM
  *     rethink name space problems
  *     need a proper out-of-band
  *
  * Notes on locking:
@@ -486,8 +486,9 @@
                case SOCK_DGRAM:
                        panic("uipc 1");
                        /*NOTREACHED*/
 
+               case SOCK_SEQPACKET: /* FALLTHROUGH */
                case SOCK_STREAM:
 #define        rcv (&so->so_rcv)
 #define snd (&so2->so_snd)
                        if (unp->unp_conn == 0)
@@ -566,8 +567,9 @@
                                unp_disconnect(unp);
                        break;
                }
 
+               case SOCK_SEQPACKET: /* FALLTHROUGH */
                case SOCK_STREAM:
 #define        rcv (&so2->so_rcv)
 #define        snd (&so->so_snd)
                        if (unp->unp_conn == NULL) {
@@ -578,9 +580,9 @@
                        KASSERT(solocked2(so, so2));
                        if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
                                /*
                                 * Credentials are passed only once on
-                                * SOCK_STREAM.
+                                * SOCK_STREAM and SOCK_SEQPACKET.
                                 */
                                unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
                                control = unp_addsockcred(l, control);
                        }
@@ -591,10 +593,21 @@
                         */
                        if (control) {
                                if (sbappendcontrol(rcv, m, control) != 0)
                                        control = NULL;
-                       } else
-                               sbappend(rcv, m);
+                       } else {
+                               switch(so->so_type) {
+                               case SOCK_SEQPACKET:
+                                       sbappendrecord(rcv, m);
+                                       break;
+                               case SOCK_STREAM:
+                                       sbappend(rcv, m);
+                                       break;
+                               default:
+                                       panic("uipc_usrreq");
+                                       break;
+                               }
+                       }
                        snd->sb_mbmax -=
                            rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
                        unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
                        newhiwat = snd->sb_hiwat -
@@ -628,12 +641,20 @@
                break;
 
        case PRU_SENSE:
                ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
-               if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
+               switch (so->so_type) {
+               case SOCK_SEQPACKET: /* FALLTHROUGH */
+               case SOCK_STREAM:
+                       if (unp->unp_conn == 0) 
+                               break;
+
                        so2 = unp->unp_conn->unp_socket;
                        KASSERT(solocked2(so, so2));
                        ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
+                       break;
+               default:
+                       break;
                }
                ((struct stat *) m)->st_dev = NODEV;
                if (unp->unp_ino == 0)
                        unp->unp_ino = unp_ino++;
@@ -766,8 +787,9 @@
        struct unpcb *unp;
        int error;
 
        switch (so->so_type) {
+       case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:
                if (so->so_lock == NULL) {
                        /* 
                         * XXX Assuming that no socket locks are held,
@@ -1092,8 +1114,9 @@
                unp2->unp_refs = unp;
                soisconnected(so);
                break;
 
+       case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:
                unp2->unp_conn = unp;
                if (req == PRU_CONNECT &&
                    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
@@ -1149,8 +1172,9 @@
                unp->unp_nextref = 0;
                so->so_state &= ~SS_ISCONNECTED;
                break;
 
+       case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:
                KASSERT(solocked2(so, unp2->unp_socket));
                soisdisconnected(so);
                unp2->unp_conn = 0;
@@ -1170,11 +1194,17 @@
 unp_shutdown(struct unpcb *unp)
 {
        struct socket *so;
 
-       if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
-           (so = unp->unp_conn->unp_socket))
-               socantrcvmore(so);
+       switch(unp->unp_socket->so_type) {
+       case SOCK_SEQPACKET: /* FALLTHROUGH */
+       case SOCK_STREAM:
+               if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
+                       socantrcvmore(so);
+               break;
+       default:
+               break;
+       }
 }
 
 bool
 unp_drop(struct unpcb *unp, int errno)


Home | Main Index | Thread Index | Old Index