Subject: can't send TCP/IP packets >= 32K
To: None <tech-kern@NetBSD.ORG, mycroft@NetBSD.ORG>
From: Kevin M. Lahey <kml@nas.nasa.gov>
List: tech-kern
Date: 08/04/1996 18:25:12
I recently added some simple code to allow me to change the MTU on
the loopback interface, and I noticed some problems with the BSD 
networking code.  No IP packets of 32K or larger could be received, 
even when fragmented.  

Strangely enough, the ip_len and ip_off in <netinet/ip.h> are *signed*
shorts (int16_t).  Under -current (as of July 27), changing these 
(and UDP lengths, ip_ovly lengths, IP offsets, etc.) to 
unsigned shorts (u_int16_t) seemed to fix the problem.

Check out the following comment in ip.h:

/*
 * We declare ip_len and ip_off to be short, rather than u_short
 * pragmatically since otherwise unsigned comparisons can result
 * against negative integers quite easily, and fail in subtle ways.
 */

Is there still a good reason to keep these as signed shorts
(int16_t)?  Surely all of the rest of the variables with which 
these will be compared are now regular ints?  Am I missing anything?
The other way to fix this is to add a bazillion casts throughout
the TCP/IP stack, but this is painful and ugly.

BTW:  Of the OSes which successfully send and receive packets larger
than 32K, IRIX and AIX define these values to be unsigned, and
Solaris leaves 'em signed.

Thanks,
Kevin

kml@nas.nasa.gov

Here are my fixes for this problem.  These assume that unsigned 
values are okay for ip_len, etc.  Also included:

* a tweak to the UDP stack that will disallow the sending of more 
UDP data than would fit in a max size IP packet (otherwise it'll 
let any size packet through, and just wrap the IP length field [!]).

* a tweak to ping to allow it to request large enough send and receive
buffers to send packets up to the largest allowable.  This is useful
if you want to test the changes.  :-)

Index: /usr/src/sys/netinet/ip.h
===================================================================
RCS file: /u/wk/cvsroot/netbsd/src/sys/netinet/ip.h,v
retrieving revision 1.1.1.1
diff -c -r1.1.1.1 ip.h
*** ip.h	1996/08/02 02:50:17	1.1.1.1
--- ip.h	1996/08/05 01:00:12
***************
*** 43,52 ****
  
  /*
   * Structure of an internet header, naked of options.
-  *
-  * We declare ip_len and ip_off to be short, rather than u_short
-  * pragmatically since otherwise unsigned comparisons can result
-  * against negative integers quite easily, and fail in subtle ways.
   */
  struct ip {
  #if BYTE_ORDER == LITTLE_ENDIAN
--- 43,48 ----
***************
*** 58,66 ****
  		  ip_hl:4;		/* header length */
  #endif
  	u_int8_t  ip_tos;		/* type of service */
! 	int16_t	  ip_len;		/* total length */
  	u_int16_t ip_id;		/* identification */
! 	int16_t	  ip_off;		/* fragment offset field */
  #define	IP_DF 0x4000			/* dont fragment flag */
  #define	IP_MF 0x2000			/* more fragments flag */
  #define	IP_OFFMASK 0x1fff		/* mask for fragmenting bits */
--- 54,62 ----
  		  ip_hl:4;		/* header length */
  #endif
  	u_int8_t  ip_tos;		/* type of service */
! 	u_int16_t ip_len;		/* total length */
  	u_int16_t ip_id;		/* identification */
! 	u_int16_t ip_off;		/* fragment offset field */
  #define	IP_DF 0x4000			/* dont fragment flag */
  #define	IP_MF 0x2000			/* more fragments flag */
  #define	IP_OFFMASK 0x1fff		/* mask for fragmenting bits */
Index: /usr/src/sys/netinet/ip_var.h
===================================================================
RCS file: /u/wk/cvsroot/netbsd/src/sys/netinet/ip_var.h,v
retrieving revision 1.1.1.1
diff -c -r1.1.1.1 ip_var.h
*** ip_var.h	1996/08/02 02:50:17	1.1.1.1
--- ip_var.h	1996/08/03 23:18:12
***************
*** 41,51 ****
   * Overlay for ip header used by other protocols (tcp, udp).
   */
  struct ipovly {
! 	u_int8_t ih_x1[9];		/* (unused) */
! 	u_int8_t ih_pr;			/* protocol */
! 	int16_t	 ih_len;		/* protocol length */
! 	struct	 in_addr ih_src;	/* source internet address */
! 	struct	 in_addr ih_dst;	/* destination internet address */
  };
  
  /*
--- 41,51 ----
   * Overlay for ip header used by other protocols (tcp, udp).
   */
  struct ipovly {
! 	u_int8_t  ih_x1[9];		/* (unused) */
! 	u_int8_t  ih_pr;		/* protocol */
! 	u_int16_t ih_len;		/* protocol length */
! 	struct	  in_addr ih_src;	/* source internet address */
! 	struct	  in_addr ih_dst;	/* destination internet address */
  };
  
  /*
Index: /usr/src/sys/netinet/udp.h
===================================================================
RCS file: /u/wk/cvsroot/netbsd/src/sys/netinet/udp.h,v
retrieving revision 1.1.1.1
diff -c -r1.1.1.1 udp.h
*** udp.h	1996/08/02 02:50:17	1.1.1.1
--- udp.h	1996/08/03 23:15:38
***************
*** 42,47 ****
  struct udphdr {
  	u_int16_t uh_sport;		/* source port */
  	u_int16_t uh_dport;		/* destination port */
! 	int16_t	  uh_ulen;		/* udp length */
  	u_int16_t uh_sum;		/* udp checksum */
  };
--- 42,47 ----
  struct udphdr {
  	u_int16_t uh_sport;		/* source port */
  	u_int16_t uh_dport;		/* destination port */
! 	u_int16_t uh_ulen;		/* udp length */
  	u_int16_t uh_sum;		/* udp checksum */
  };
Index: /usr/src/sys/netinet/udp_usrreq.c
===================================================================
RCS file: /u/wk/cvsroot/netbsd/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.1.1.1
diff -c -r1.1.1.1 udp_usrreq.c
*** udp_usrreq.c	1996/08/02 02:50:17	1.1.1.1
--- udp_usrreq.c	1996/08/04 00:09:29
***************
*** 430,435 ****
--- 430,440 ----
  	if (control)
  		m_freem(control);		/* XXX */
  
+ 	if (sizeof (struct udpiphdr) + len > IP_MAXPACKET) {
+ 		error = EMSGSIZE;
+ 		goto release;
+ 	}
+ 
  	if (addr) {
  		laddr = inp->inp_laddr;
  		if (inp->inp_faddr.s_addr != INADDR_ANY) {

Index: /usr/src/sbin/ping/ping.c
===================================================================
RCS file: /u/wk/cvsroot/netbsd/src/sbin/ping/ping.c,v
retrieving revision 1.1.1.1
diff -c -r1.1.1.1 ping.c
*** ping.c	1996/08/02 02:47:00	1.1.1.1
--- ping.c	1996/08/03 23:57:00
***************
*** 178,184 ****
  	struct protoent *proto;
  	struct in_addr ifaddr, saddr;
  	register int i;
! 	int ch, fdmask, hold, packlen, preload;
  	u_char *datap, *packet;
  	char *target, hnamebuf[MAXHOSTNAMELEN];
  	u_char ttl, loop = 1;
--- 178,184 ----
  	struct protoent *proto;
  	struct in_addr ifaddr, saddr;
  	register int i;
! 	int ch, fdmask, hold, packlen, preload, maxsize, maxsizelen;
  	u_char *datap, *packet;
  	char *target, hnamebuf[MAXHOSTNAMELEN];
  	u_char ttl, loop = 1;
***************
*** 364,376 ****
--- 364,396 ----
  		       sizeof(ifaddr)) < 0)
  		err(1, "setsockopt IP_MULTICAST_IF");
  
+ 	/* 
+ 	 * When trying to send large packets, you must increase the
+ 	 * size of both the send and receive buffers...
+ 	 */
+ 
+ 	maxsizelen = sizeof maxsize;
+ 	if (getsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *) &maxsize, 
+ 		       &maxsizelen) < 0)
+ 		err(1, "getsockopt");
+ 
+ 	if (maxsize < packlen) {
+ 		if(setsockopt(s, SOL_SOCKET, SO_SNDBUF, (char *) &packlen, 
+ 		              sizeof(maxsize)) < 0)
+ 			err(1, "setsockopt");
+ 	}
+ 
  	/*
  	 * When pinging the broadcast address, you can get a lot of answers.
  	 * Doing something so evil is useful if you are trying to stress the
  	 * ethernet, or just want to fill the arp cache to get some stuff for
  	 * /etc/ethers.
  	 */
+ 
  	hold = 48 * 1024;
+ 	if (hold < packlen)
+ 		hold = packlen;
+ 
  	(void)setsockopt(s, SOL_SOCKET, SO_RCVBUF, (char *)&hold,
  	    sizeof(hold));