Subject: IP_RECVIF addition
To: None <tech-net@NetBSD.ORG>
From: Bill Fenner <fenner@parc.xerox.com>
List: tech-net
Date: 10/18/1996 15:26:17
Hi,

  After talking with Van Jacobson about some problems I've been having
with mrouted on point-to-point links and IGMP memberships from mobile
nodes, we came to the conclusion that the reception interface should
be available as control information with recvmsg().  (In fact, Van was
surprised that it wasn't.)  I include diffs to FreeBSD to:

- make IP_RECVDSTADDR work for the multicast udp case (one of the more useful!)
- make IP_RECVDSTADDR work for raw sockets (absolutely useless)
- add IP_RECVIF, for udp and raw, which supplies an AF_LINK sockaddr.
- make IP_RECVOPTS and IP_RECVRETOPTS even more broken (probably not an issue)

  Due to the timestamping code, I presume this won't apply directly to
NetBSD, but I'd like feedback on the style & implementation since I want
it to be common to *BSD before I bother comitting it to FreeBSD.  BSDI
has said that they'll support it as a patch to 3.0 if/when needed.

Thanks,
  Bill


*** kern/uipc_socket2.c.orig	Thu Sep 26 23:40:26 1996
--- kern/uipc_socket2.c	Fri Oct 11 06:56:41 1996
***************
*** 754,757 ****
--- 799,828 ----
  			m = mn;
  		} while (m);
  	}
+ }
+ 
+ /*
+  * Create a "control" mbuf containing the specified data
+  * with the specified type for presentation on a socket buffer.
+  */
+ struct mbuf *
+ sbcreatecontrol(p, size, type, level)
+ 	caddr_t p;
+ 	register int size;
+ 	int type, level;
+ {
+ 	register struct cmsghdr *cp;
+ 	struct mbuf *m;
+ 
+ 	if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
+ 		return ((struct mbuf *) NULL);
+ 	cp = mtod(m, struct cmsghdr *);
+ 	/* XXX check size? */
+ 	(void)memcpy(CMSG_DATA(cp), p, size);
+ 	size += sizeof(*cp);
+ 	m->m_len = size;
+ 	cp->cmsg_len = size;
+ 	cp->cmsg_level = level;
+ 	cp->cmsg_type = type;
+ 	return (m);
  }
*** netinet/in.h.orig	Fri Sep 27 19:50:46 1996
--- netinet/in.h	Thu Sep 26 20:46:10 1996
***************
*** 210,215 ****
--- 210,216 ----
  #define IP_RSVP_VIF_ON		17   /* set RSVP per-vif socket */
  #define IP_RSVP_VIF_OFF		18   /* unset RSVP per-vif socket */
  #define IP_PORTRANGE		19   /* int; range to choose for unspec port */
+ #define	IP_RECVIF		20   /* bool; receive reception if w/dgram */
  
  /*
   * Defaults and limits for options
*** netinet/in_pcb.h.orig	Thu Sep 26 20:49:17 1996
--- netinet/in_pcb.h	Thu Sep 26 20:48:04 1996
***************
*** 76,85 ****
  #define	INP_RECVOPTS		0x01	/* receive incoming IP options */
  #define	INP_RECVRETOPTS		0x02	/* receive IP options for reply */
  #define	INP_RECVDSTADDR		0x04	/* receive IP dst address */
- #define	INP_CONTROLOPTS		(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR)
  #define	INP_HDRINCL		0x08	/* user supplies entire IP header */
  #define	INP_HIGHPORT		0x10	/* user wants "high" port binding */
  #define	INP_LOWPORT		0x20	/* user wants "low" port binding */
  
  #define	INPLOOKUP_WILDCARD	1
  
--- 76,87 ----
  #define	INP_RECVOPTS		0x01	/* receive incoming IP options */
  #define	INP_RECVRETOPTS		0x02	/* receive IP options for reply */
  #define	INP_RECVDSTADDR		0x04	/* receive IP dst address */
  #define	INP_HDRINCL		0x08	/* user supplies entire IP header */
  #define	INP_HIGHPORT		0x10	/* user wants "high" port binding */
  #define	INP_LOWPORT		0x20	/* user wants "low" port binding */
+ #define	INP_RECVIF		0x40	/* receive incoming interface */
+ #define	INP_CONTROLOPTS		(INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\
+ 					INP_RECVIF)
  
  #define	INPLOOKUP_WILDCARD	1
  
*** netinet/ip_input.c.orig	Thu Sep 26 18:33:22 1996
--- netinet/ip_input.c	Thu Sep 26 23:44:35 1996
***************
*** 51,56 ****
--- 51,57 ----
  #include <sys/sysctl.h>
  
  #include <net/if.h>
+ #include <net/if_dl.h>
  #include <net/route.h>
  #include <net/netisr.h>
  
***************
*** 1234,1239 ****
--- 1235,1297 ----
  		break;
  	}
  	icmp_error(mcopy, type, code, dest, destifp);
+ }
+ 
+ void
+ ip_savecontrol(inp, mp, ip, m)
+ 	register struct inpcb *inp;
+ 	register struct mbuf **mp;
+ 	register struct ip *ip;
+ 	register struct mbuf *m;
+ {
+ 	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
+ 		struct timeval tv;
+ 
+ 		microtime(&tv);
+ 		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
+ 			SCM_TIMESTAMP, SOL_SOCKET);
+ 		if (*mp)
+ 			mp = &(*mp)->m_next;
+ 	}
+ 	if (inp->inp_flags & INP_RECVDSTADDR) {
+ 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
+ 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
+ 		if (*mp)
+ 			mp = &(*mp)->m_next;
+ 	}
+ #ifdef notyet
+ 	/* XXX
+ 	 * Moving these out of udp_input() made them even more broken
+ 	 * than they already were.
+ 	 */
+ 	/* options were tossed already */
+ 	if (inp->inp_flags & INP_RECVOPTS) {
+ 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
+ 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
+ 		if (*mp)
+ 			mp = &(*mp)->m_next;
+ 	}
+ 	/* ip_srcroute doesn't do what we want here, need to fix */
+ 	if (inp->inp_flags & INP_RECVRETOPTS) {
+ 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
+ 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
+ 		if (*mp)
+ 			mp = &(*mp)->m_next;
+ 	}
+ #endif
+ 	if (inp->inp_flags & INP_RECVIF) {
+ 		struct sockaddr_dl sdl;
+ 
+ 		sdl.sdl_len = sizeof(sdl);
+ 		sdl.sdl_family = AF_LINK;
+ 		sdl.sdl_index = m->m_pkthdr.rcvif ?
+ 			m->m_pkthdr.rcvif->if_index : 0;
+ 		sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0;
+ 		*mp = sbcreatecontrol((caddr_t) &sdl, sdl.sdl_len,
+ 			IP_RECVIF, IPPROTO_IP);
+ 		if (*mp)
+ 			mp = &(*mp)->m_next;
+ 	}
  }
  
  int
*** netinet/ip_output.c.orig	Thu Sep 26 22:57:39 1996
--- netinet/ip_output.c	Thu Sep 26 22:58:51 1996
***************
*** 594,599 ****
--- 594,600 ----
  		case IP_RECVOPTS:
  		case IP_RECVRETOPTS:
  		case IP_RECVDSTADDR:
+ 		case IP_RECVIF:
  			if (m == 0 || m->m_len != sizeof(int))
  				error = EINVAL;
  			else {
***************
*** 624,629 ****
--- 625,634 ----
  				case IP_RECVDSTADDR:
  					OPTSET(INP_RECVDSTADDR);
  					break;
+ 
+ 				case IP_RECVIF:
+ 					OPTSET(INP_RECVIF);
+ 					break;
  				}
  			}
  			break;
***************
*** 694,699 ****
--- 699,705 ----
  		case IP_RECVOPTS:
  		case IP_RECVRETOPTS:
  		case IP_RECVDSTADDR:
+ 		case IP_RECVIF:
  			*mp = m = m_get(M_WAIT, MT_SOOPTS);
  			m->m_len = sizeof(int);
  			switch (optname) {
***************
*** 718,723 ****
--- 724,733 ----
  
  			case IP_RECVDSTADDR:
  				optval = OPTBIT(INP_RECVDSTADDR);
+ 				break;
+ 
+ 			case IP_RECVIF:
+ 				optval = OPTBIT(INP_RECVIF);
  				break;
  			}
  			*mtod(m, int *) = optval;
*** netinet/ip_var.h.orig	Fri Sep 27 19:49:42 1996
--- netinet/ip_var.h	Thu Sep 26 23:51:32 1996
***************
*** 171,176 ****
--- 171,178 ----
  			  struct ip_moptions *));
  int	 ip_output __P((struct mbuf *,
  	    struct mbuf *, struct route *, int, struct ip_moptions *));
+ void	 ip_savecontrol __P((struct inpcb *, struct mbuf **, struct ip *,
+ 		struct mbuf *));
  void	 ip_slowtimo __P((void));
  struct mbuf *
  	 ip_srcroute __P((void));
*** netinet/raw_ip.c.orig	Thu Sep 26 18:28:19 1996
--- netinet/raw_ip.c	Thu Sep 26 23:02:24 1996
***************
*** 106,112 ****
  {
  	register struct ip *ip = mtod(m, struct ip *);
  	register struct inpcb *inp;
! 	struct socket *last = 0;
  
  	ripsrc.sin_addr = ip->ip_src;
  	for (inp = ripcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
--- 106,113 ----
  {
  	register struct ip *ip = mtod(m, struct ip *);
  	register struct inpcb *inp;
! 	struct inpcb *last = 0;
! 	struct mbuf *opts = 0;
  
  	ripsrc.sin_addr = ip->ip_src;
  	for (inp = ripcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) {
***************
*** 121,143 ****
  		if (last) {
  			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
  			if (n) {
! 				if (sbappendaddr(&last->so_rcv,
  				    (struct sockaddr *)&ripsrc, n,
! 				    (struct mbuf *)0) == 0)
  					/* should notify about lost packet */
  					m_freem(n);
! 				else
! 					sorwakeup(last);
  			}
  		}
! 		last = inp->inp_socket;
  	}
  	if (last) {
! 		if (sbappendaddr(&last->so_rcv, (struct sockaddr *)&ripsrc,
! 		    m, (struct mbuf *)0) == 0)
  			m_freem(m);
! 		else
! 			sorwakeup(last);
  	} else {
  		m_freem(m);
                ipstat.ips_noproto++;
--- 122,155 ----
  		if (last) {
  			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
  			if (n) {
! 				if (last->inp_flags & INP_CONTROLOPTS ||
! 				    last->inp_socket->so_options & SO_TIMESTAMP)
! 				    ip_savecontrol(last, &opts, ip, n);
! 				if (sbappendaddr(&last->inp_socket->so_rcv,
  				    (struct sockaddr *)&ripsrc, n,
! 				    opts) == 0) {
  					/* should notify about lost packet */
  					m_freem(n);
! 					if (opts)
! 					    m_freem(opts);
! 				} else
! 					sorwakeup(last->inp_socket);
! 				opts = 0;
  			}
  		}
! 		last = inp;
  	}
  	if (last) {
! 		if (last->inp_flags & INP_CONTROLOPTS ||
! 		    last->inp_socket->so_options & SO_TIMESTAMP)
! 			ip_savecontrol(last, &opts, ip, m);
! 		if (sbappendaddr(&last->inp_socket->so_rcv,
! 		    (struct sockaddr *)&ripsrc, m, opts) == 0) {
  			m_freem(m);
! 			if (opts)
! 			    m_freem(opts);
! 		} else
! 			sorwakeup(last->inp_socket);
  	} else {
  		m_freem(m);
                ipstat.ips_noproto++;
*** netinet/udp_usrreq.c.orig	Thu Sep 26 18:33:27 1996
--- netinet/udp_usrreq.c	Fri Sep 27 20:03:44 1996
***************
*** 97,104 ****
  
  static	void udp_detach __P((struct inpcb *));
  static	void udp_notify __P((struct inpcb *, int));
- static	struct mbuf *udp_saveopt __P((caddr_t, int, int));
- static struct mbuf *udp_timestamp __P((void));
  
  void
  udp_init()
--- 97,102 ----
***************
*** 183,189 ****
  
  	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
  	    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
! 		struct socket *last;
  		/*
  		 * Deliver a multicast or broadcast datagram to *all* sockets
  		 * for which the local and remote addresses and ports match
--- 181,187 ----
  
  	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
  	    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
! 		struct inpcb *last;
  		/*
  		 * Deliver a multicast or broadcast datagram to *all* sockets
  		 * for which the local and remote addresses and ports match
***************
*** 231,246 ****
  				struct mbuf *n;
  
  				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
! 					if (sbappendaddr(&last->so_rcv,
  						(struct sockaddr *)&udp_in,
! 						n, (struct mbuf *)0) == 0) {
  						m_freem(n);
  						udpstat.udps_fullsock++;
  					} else
! 						sorwakeup(last);
  				}
  			}
! 			last = inp->inp_socket;
  			/*
  			 * Don't look for additional matches if this one does
  			 * not have either the SO_REUSEPORT or SO_REUSEADDR
--- 229,250 ----
  				struct mbuf *n;
  
  				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
! 					if (last->inp_flags & INP_CONTROLOPTS
! 					    || last->inp_socket->so_options & SO_TIMESTAMP)
! 						ip_savecontrol(last, &opts, ip, n);
! 					if (sbappendaddr(&last->inp_socket->so_rcv,
  						(struct sockaddr *)&udp_in,
! 						n, opts) == 0) {
  						m_freem(n);
+ 						if (opts)
+ 						    m_freem(opts);
  						udpstat.udps_fullsock++;
  					} else
! 						sorwakeup(last->inp_socket);
! 					opts = 0;
  				}
  			}
! 			last = inp;
  			/*
  			 * Don't look for additional matches if this one does
  			 * not have either the SO_REUSEPORT or SO_REUSEADDR
***************
*** 249,255 ****
  			 * port.  It * assumes that an application will never
  			 * clear these options after setting them.
  			 */
! 			if ((last->so_options&(SO_REUSEPORT|SO_REUSEADDR) == 0))
  				break;
  		}
  
--- 253,259 ----
  			 * port.  It * assumes that an application will never
  			 * clear these options after setting them.
  			 */
! 			if ((last->inp_socket->so_options&(SO_REUSEPORT|SO_REUSEADDR) == 0))
  				break;
  		}
  
***************
*** 262,273 ****
  			udpstat.udps_noportbcast++;
  			goto bad;
  		}
! 		if (sbappendaddr(&last->so_rcv, (struct sockaddr *)&udp_in,
! 		     m, (struct mbuf *)0) == 0) {
  			udpstat.udps_fullsock++;
  			goto bad;
  		}
! 		sorwakeup(last);
  		return;
  	}
  #if 0		/* re-enable this if your application actually could use it */
--- 266,281 ----
  			udpstat.udps_noportbcast++;
  			goto bad;
  		}
! 		if (last->inp_flags & INP_CONTROLOPTS
! 		    || last->inp_socket->so_options & SO_TIMESTAMP)
! 			ip_savecontrol(last, &opts, ip, m);
! 		if (sbappendaddr(&last->inp_socket->so_rcv,
! 		     (struct sockaddr *)&udp_in,
! 		     m, opts) == 0) {
  			udpstat.udps_fullsock++;
  			goto bad;
  		}
! 		sorwakeup(last->inp_socket);
  		return;
  	}
  #if 0		/* re-enable this if your application actually could use it */
***************
*** 325,360 ****
  	udp_in.sin_port = uh->uh_sport;
  	udp_in.sin_addr = ip->ip_src;
  	if (inp->inp_flags & INP_CONTROLOPTS
! 	    || inp->inp_socket->so_options & SO_TIMESTAMP) {
! 		struct mbuf **mp = &opts;
! 
! 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
! 			if (*mp = udp_timestamp())
! 				mp = &(*mp)->m_next;
! 		}
! 		if (inp->inp_flags & INP_RECVDSTADDR) {
! 			*mp = udp_saveopt((caddr_t) &ip->ip_dst,
! 			    sizeof(struct in_addr), IP_RECVDSTADDR);
! 			if (*mp)
! 				mp = &(*mp)->m_next;
! 		}
! #ifdef notyet
! 		/* options were tossed above */
! 		if (inp->inp_flags & INP_RECVOPTS) {
! 			*mp = udp_saveopt((caddr_t) opts_deleted_above,
! 			    sizeof(struct in_addr), IP_RECVOPTS);
! 			if (*mp)
! 				mp = &(*mp)->m_next;
! 		}
! 		/* ip_srcroute doesn't do what we want here, need to fix */
! 		if (inp->inp_flags & INP_RECVRETOPTS) {
! 			*mp = udp_saveopt((caddr_t) ip_srcroute(),
! 			    sizeof(struct in_addr), IP_RECVRETOPTS);
! 			if (*mp)
! 				mp = &(*mp)->m_next;
! 		}
! #endif
! 	}
  	iphlen += sizeof(struct udphdr);
  	m->m_len -= iphlen;
  	m->m_pkthdr.len -= iphlen;
--- 333,340 ----
  	udp_in.sin_port = uh->uh_sport;
  	udp_in.sin_addr = ip->ip_src;
  	if (inp->inp_flags & INP_CONTROLOPTS
! 	    || inp->inp_socket->so_options & SO_TIMESTAMP)
! 		ip_savecontrol(inp, &opts, ip, m);
  	iphlen += sizeof(struct udphdr);
  	m->m_len -= iphlen;
  	m->m_pkthdr.len -= iphlen;
***************
*** 370,426 ****
  	m_freem(m);
  	if (opts)
  		m_freem(opts);
- }
- 
- /*
-  * Create a "control" mbuf containing the specified data
-  * with the specified type for presentation with a datagram.
-  */
- struct mbuf *
- udp_saveopt(p, size, type)
- 	caddr_t p;
- 	register int size;
- 	int type;
- {
- 	register struct cmsghdr *cp;
- 	struct mbuf *m;
- 
- 	if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
- 		return ((struct mbuf *) NULL);
- 	cp = (struct cmsghdr *) mtod(m, struct cmsghdr *);
- 	(void)memcpy(CMSG_DATA(cp), p, size);
- 	size += sizeof(*cp);
- 	m->m_len = size;
- 	cp->cmsg_len = size;
- 	cp->cmsg_level = IPPROTO_IP;
- 	cp->cmsg_type = type;
- 	return (m);
- }
- 
- /*
-  *  Create an mbuf with the SCM_TIMESTAMP socket option data (struct timeval)
-  *  inside.  This really isn't UDP specific; but there's not really a better
-  *  place for it yet..
-  */
- static struct mbuf *
- udp_timestamp()
- {
- 	register struct cmsghdr *cp;
- 	struct mbuf *m;
- 	struct timeval tv;
- 
- 	MGET(m, M_DONTWAIT, MT_CONTROL);
- 	if (m == 0)
- 		return (struct mbuf *) 0;
- 
- 	microtime(&tv);
- 	cp = (struct cmsghdr *) mtod(m, struct cmsghdr *);
- 	cp->cmsg_len = 
- 	    m->m_len = sizeof(*cp) + sizeof(struct timeval);
- 	cp->cmsg_level = SOL_SOCKET;
- 	cp->cmsg_type = SCM_TIMESTAMP;
- 	(void) memcpy(CMSG_DATA(cp), &tv, sizeof(struct timeval));
- 	return (m);
  }
  
  /*
--- 350,355 ----