Subject: SOCK_SEQPACKET UNIX domain socket patch
To: None <tech-kern@netbsd.org>
From: Jesse Off <jesseoff@yahoo.com>
List: tech-kern
Date: 04/14/2003 22:39:35
--0-1079248405-1050385175=:11524
Content-Type: text/plain; charset=us-ascii
Content-Id: 
Content-Disposition: inline

Attached is a diff against 1.6.1 for support of
SOCK_SEQPACKET UNIX domain sockets.  Currently in
NetBSD SOCK_DGRAM sockets are not flow controlled and
once the receive buffer is full, writes return EINVAL
(or ENOBUF, as of a few days ago).  Furthermore, there
is no way for a process to block until the condition
passes. i.e. select() won't work and the end user ends
up having to sleep and retry.  A SOCK_SEQPACKET socket
behaves a little like a SOCK_STREAM in that it is flow
controlled and connection oriented, but the
read/writes have the semantics of a SOCK_DGRAM.

In this patch, I've also changed the way backpressure
is maintained on SOCK_STREAM/SOCK_SEQPACKET.  In the
current source tree, writes are passed immediately to
the connected socket's receive buffer, and the sending
socket's send buffer size is reduced.  This causes
problems for a packet oriented connection, since
reducing the send buffer size could cause an EMSGSIZE
error when the client socket's receive buffer is
almost full.  The alternative is to increase the send
socket buffer count rather than reduce the send buffer
size, which is what I do in the patch.  

It would be nice to see this in the -current tree, I'm
actually using it against 1.6.1 without a problem. 
I'm using it to pass data received from udp to another
process without having to deal with framing and
escaping and buffering each variable sized message.

//Jesse Off



__________________________________________________
Do you Yahoo!?
The New Yahoo! Search - Faster. Easier. Bingo
http://search.yahoo.com
--0-1079248405-1050385175=:11524
Content-Type: text/plain; name="seqpacket.diff"
Content-Description: seqpacket.diff
Content-Disposition: inline; filename="seqpacket.diff"

Index: kern/uipc_proto.c
===================================================================
RCS file: /cvsroot/src/sys/kern/uipc_proto.c,v
retrieving revision 1.13
diff -u -r1.13 uipc_proto.c
--- kern/uipc_proto.c	2001/11/12 15:25:32	1.13
+++ kern/uipc_proto.c	2003/04/14 15:57:23
@@ -66,6 +66,11 @@
   uipc_usrreq,
   0,		0,		0,		0,
 },
+{ SOCK_SEQPACKET,	&unixdomain,	0,	PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ATOMIC|PR_RIGHTS,
+  0,		0,		0,		uipc_ctloutput,
+  uipc_usrreq,
+  0,		0,		0,		0,
+},
 { 0,		0,		0,		0,
   raw_input,	0,		raw_ctlinput,	0,
   raw_usrreq,
Index: kern/uipc_usrreq.c
===================================================================
RCS file: /cvsroot/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.53
diff -u -r1.53 uipc_usrreq.c
--- kern/uipc_usrreq.c	2001/11/12 15:25:34	1.53
+++ kern/uipc_usrreq.c	2003/04/14 15:57:25
@@ -96,7 +96,7 @@
  * Unix communications domain.
  *
  * TODO:
- *	SEQPACKET, RDM
+ *	RDM
  *	rethink name space problems
  *	need a proper out-of-band
  */
@@ -242,6 +242,7 @@
 			/*NOTREACHED*/
 
 		case SOCK_STREAM:
+		case SOCK_SEQPACKET:
 #define	rcv (&so->so_rcv)
 #define snd (&so2->so_snd)
 			if (unp->unp_conn == 0)
@@ -251,10 +252,8 @@
 			 * Adjust backpressure on sender
 			 * and wakeup any waiting to write.
 			 */
-			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
-			unp->unp_mbcnt = rcv->sb_mbcnt;
-			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
-			unp->unp_cc = rcv->sb_cc;
+			snd->sb_cc = rcv->sb_cc;
+			snd->sb_mbcnt = rcv->sb_mbcnt;
 			sowwakeup(so2);
 #undef snd
 #undef rcv
@@ -301,6 +300,7 @@
 			break;
 		}
 
+		case SOCK_SEQPACKET:
 		case SOCK_STREAM:
 #define	rcv (&so2->so_rcv)
 #define	snd (&so->so_snd)
@@ -310,7 +310,7 @@
 			if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
 				/*
 				 * Credentials are passed only once on
-				 * SOCK_STREAM.
+				 * SOCK_STREAM and SOCK_SEQPACKET.
 				 */
 				unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
 				control = unp_addsockcred(p, control);
@@ -323,13 +323,12 @@
 			if (control) {
 				if (sbappendcontrol(rcv, m, control) == 0)
 					m_freem(control);
-			} else
+			} else if (so->so_type == SOCK_SEQPACKET) 
+				sbappendrecord(rcv, m);
+			else 
 				sbappend(rcv, m);
-			snd->sb_mbmax -=
-			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
-			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
-			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
-			unp->unp_conn->unp_cc = rcv->sb_cc;
+			snd->sb_cc = rcv->sb_cc;
+			snd->sb_mbcnt = rcv->sb_mbcnt;
 			sorwakeup(so2);
 #undef snd
 #undef rcv
@@ -494,6 +493,7 @@
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		switch (so->so_type) {
 
+		case SOCK_SEQPACKET:
 		case SOCK_STREAM:
 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
 			break;
@@ -535,6 +535,12 @@
 		unp_drop(unp->unp_refs, ECONNRESET);
 	soisdisconnected(unp->unp_socket);
 	unp->unp_socket->so_pcb = 0;
+	/*
+	 * The send buffer never has any data in it since all sent data
+	 * is immediately transferred to the client socket's receive buffer.
+	 */ 
+	unp->unp_socket->so_snd.sb_cc = 0;
+	unp->unp_socket->so_snd.sb_mbcnt = 0;
 	if (unp->unp_addr)
 		free(unp->unp_addr, M_SONAME);
 	if (unp_rights) {
@@ -706,6 +712,7 @@
 		soisconnected(so);
 		break;
 
+	case SOCK_SEQPACKET:
 	case SOCK_STREAM:
 		unp2->unp_conn = unp;
 		soisconnected(so);
@@ -747,6 +754,7 @@
 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
 		break;
 
+	case SOCK_SEQPACKET:
 	case SOCK_STREAM:
 		soisdisconnected(unp->unp_socket);
 		unp2->unp_conn = 0;
Index: sys/unpcb.h
===================================================================
RCS file: /cvsroot/src/sys/sys/unpcb.h,v
retrieving revision 1.11
diff -u -r1.11 unpcb.h
--- sys/unpcb.h	1998/01/07 22:49:47	1.11
+++ sys/unpcb.h	2003/04/14 15:57:51
@@ -78,8 +78,6 @@
 	struct 	unpcb *unp_nextref;	/* link in unp_refs list */
 	struct	sockaddr_un *unp_addr;	/* bound address of socket */
 	size_t	unp_addrlen;		/* size of socket address */
-	int	unp_cc;			/* copy of rcv.sb_cc */
-	int	unp_mbcnt;		/* copy of rcv.sb_mbcnt */
 	struct	timespec unp_ctime;	/* holds creation time */
 	int	unp_flags;		/* misc flags; see below*/
 };

--0-1079248405-1050385175=:11524--