Subject: Re: Fix for KAME PF_KEY DUMP-truncation marshalling bug
To: Jason Thorpe <thorpej@wasabisystems.com>
From: Jonathan Stone <jonathan@dsg.stanford.edu>
List: tech-net
Date: 05/28/2004 16:45:49
.. And here's a version which allows arbitrarily deep (up to mbuf
supply) ACQUIREs. Well, really all KEY_SENDUP_REGISTERED messages.

This is proof-of-concept/debugging; I'd probably add the extra
explicit argument to key_sendup(), as noted, before committing.
The code should also grab the actual limit on little mbufs, instead
of abusing NMBCLUSTERS.

It works for me, and allows abnormally deep ACQUIRE queues, without
the downsides of setting kern.sbmax (or kern.ipc.maxsockbuf, on FreeBSD 4.x).



Index: sys/netipsec/keysock.c
===================================================================
RCS file: /cvsroot/src/sys/netipsec/keysock.c,v
retrieving revision 1.4
diff -u -r1.4 keysock.c
--- keysock.c	26 Apr 2004 01:41:15 -0000	1.4
+++ keysock.c	28 May 2004 23:38:34 -0000
@@ -75,10 +75,19 @@
 static struct sockaddr key_dst = { 2, PF_KEY, };
 static struct sockaddr key_src = { 2, PF_KEY, };
 
-static int key_sendup0 __P((struct rawcb *, struct mbuf *, int));
+
+static int key_sendup0 __P((struct rawcb *, struct mbuf *, int, int));
 
 struct pfkeystat pfkeystat;
 
+int key_registered_sb_max = (NMBCLUSTERS * MHLEN); /* XXX arbitrary */
+
+/* XXX sysctl */
+#ifdef __FreeBSD__
+SYSCTL_INT(_net_key, OID_AUTO, registered_sbmax, CTLFLAG_RD, 
+    &key_registered_sb_max , 0, "Maximum kernel-to-user PFKEY datagram size");
+#endif
+
 /*
  * key_output()
  */
@@ -144,12 +153,13 @@
  * send message to the socket.
  */
 static int
-key_sendup0(rp, m, promisc)
+key_sendup0(rp, m, promisc, sbprio)
 	struct rawcb *rp;
 	struct mbuf *m;
 	int promisc;
 {
 	int error;
+	int ok;
 
 	if (promisc) {
 		struct sadb_msg *pmsg;
@@ -174,8 +184,14 @@
 		pfkeystat.in_msgtype[pmsg->sadb_msg_type]++;
 	}
 
-	if (!sbappendaddr(&rp->rcb_socket->so_rcv, (struct sockaddr *)&key_src,
-	    m, NULL)) {
+	if (sbprio == 0)
+		ok = sbappendaddr(&rp->rcb_socket->so_rcv,
+			       (struct sockaddr *)&key_src, m, NULL);
+	else
+		ok = sbappendaddrchain(&rp->rcb_socket->so_rcv,
+			       (struct sockaddr *)&key_src, m, sbprio);
+
+	  if (!ok) {
 		pfkeystat.in_nomem++;
 		m_freem(m);
 		error = ENOBUFS;
@@ -272,7 +288,7 @@
 
 /* so can be NULL if target != KEY_SENDUP_ONE */
 int
-key_sendup_mbuf(so, m, target)
+key_sendup_mbuf(so, m, target /*, sbprio */)
 	struct socket *so;
 	struct mbuf *m;
 	int target;
@@ -282,11 +298,27 @@
 	int sendup;
 	struct rawcb *rp;
 	int error = 0;
+	int sbprio = 0; /* XXX should be a parameter */
 
 	if (m == NULL)
 		panic("key_sendup_mbuf: NULL pointer was passed.\n");
 	if (so == NULL && target == KEY_SENDUP_ONE)
 		panic("key_sendup_mbuf: NULL pointer was passed.\n");
+	
+	/*
+	 * RFC 2367 says ACQUIRE and other kernel-generated messages
+	 * are special. We treat all KEY_SENDUP_REGISTERED messages
+	 * as special, delivering them to all registered sockets
+	 * even if the socket is at or above its so->so_rcv.sb_max limits.
+	 * The only constraint is that the  so_rcv data fall below
+	 * key_registered_sb_max.
+	 * Doing that check here avoids reworking every key_sendup_mbuf()
+	 * in the short term. . The rework will be done after a technical
+	 * conensus that this approach is appropriate.
+ 	 */
+	if (target == KEY_SENDUP_REGISTERED) {
+		sbprio = SB_PRIO_BESTEFFORT;
+	}
 
 	pfkeystat.in_total++;
 	pfkeystat.in_bytes += m->m_pkthdr.len;
@@ -309,6 +341,7 @@
 
 	LIST_FOREACH(rp, &rawcb_list, rcb_list)
 	{
+		struct socket * kso = rp->rcb_socket;
 		if (rp->rcb_proto.sp_family != PF_KEY)
 			continue;
 		if (rp->rcb_proto.sp_protocol
@@ -325,7 +358,7 @@
 		 */
 		if (((struct keycb *)rp)->kp_promisc) {
 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
-				(void)key_sendup0(rp, n, 1);
+				(void)key_sendup0(rp, n, 1, 0);
 				n = NULL;
 			}
 		}
@@ -345,8 +378,16 @@
 			sendup++;
 			break;
 		case KEY_SENDUP_REGISTERED:
-			if (kp->kp_registered)
-				sendup++;
+			if (kp->kp_registered) {
+				if (kso->so_rcv.sb_cc <= key_registered_sb_max)
+					sendup++;
+			  	else
+			  		printf("keysock: "
+					       "registered sendup dropped, "
+					       "sb_cc %ld max %d\n",
+					       kso->so_rcv.sb_cc,
+					       key_registered_sb_max);
+			}
 			break;
 		}
 		pfkeystat.in_msgtarget[target]++;
@@ -360,7 +401,7 @@
 			return ENOBUFS;
 		}
 
-		if ((error = key_sendup0(rp, n, 0)) != 0) {
+		if ((error = key_sendup0(rp, n, 0, 0)) != 0) {
 			m_freem(m);
 			return error;
 		}
@@ -368,8 +409,9 @@
 		n = NULL;
 	}
 
+	/* The 'later' time for processing the exact target has arrived */
 	if (so) {
-		error = key_sendup0(sotorawcb(so), m, 0);
+		error = key_sendup0(sotorawcb(so), m, 0, sbprio);
 		m = NULL;
 	} else {
 		error = 0;
Index: sys/netipsec/keysock.h
===================================================================
RCS file: /cvsroot/src/sys/netipsec/keysock.h,v
retrieving revision 1.2
diff -u -r1.2 keysock.h
--- keysock.h	4 Dec 2003 19:38:25 -0000	1.2
+++ keysock.h	28 May 2004 23:38:34 -0000
@@ -82,7 +82,7 @@
 #endif
 
 extern int key_sendup __P((struct socket *, struct sadb_msg *, u_int, int));
-extern int key_sendup_mbuf __P((struct socket *, struct mbuf *, int));
+extern int key_sendup_mbuf __P((struct socket *, struct mbuf *, int /*,int*/));
 #endif /* _KERNEL */
 
 #endif /*_NETIPSEC_KEYSOCK_H_*/