Subject: PR/26773(ipf) and PR/26433(pf)
To: None <tech-net@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-net
Date: 08/31/2004 07:28:54
--NextPart-20040831071553-0067900
Content-Type: Text/Plain; charset=us-ascii

hi,

attached diffs are workarounds for PR/26773 and PR/26433.

a.diff: add m_copyback_cow() and m_makewritable().
b.diff: a short-term fix for pf.
c.diff: a short-term fix for ipf.
d.diff: redo raw_ip6.c 1.66-1.67 using m_copyback_cow().  (it isn't directly
	related to the PRs.  just an example usage of m_copyback_cow.)

comments?

YAMAMOTO Takashi

--NextPart-20040831071553-0067900
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"

Index: sys/mbuf.h
===================================================================
--- sys/mbuf.h	(revision 843)
+++ sys/mbuf.h	(working copy)
@@ -852,6 +852,8 @@ void	m_claimm(struct mbuf *, struct mown
 void	m_clget(struct mbuf *, int);
 int	m_mballoc(int, int);
 void	m_copyback(struct mbuf *, int, int, caddr_t);
+struct	mbuf *m_copyback_cow(struct mbuf *, int, int, caddr_t, int);
+struct	mbuf *m_makewritable(struct mbuf *, int, int, int);
 void	m_copydata(struct mbuf *, int, int, caddr_t);
 void	m_freem(struct mbuf *);
 void	m_reclaim(void *, int);
Index: kern/uipc_mbuf.c
===================================================================
--- kern/uipc_mbuf.c	(revision 843)
+++ kern/uipc_mbuf.c	(working copy)
@@ -114,6 +114,14 @@ struct pool_allocator mclpool_allocator 
 };
 
 static struct mbuf *m_copym0(struct mbuf *, int, int, int, int);
+static struct mbuf *m_split0(struct mbuf *, int, int, int);
+static struct mbuf *m_copyback0(struct mbuf *, int, int, caddr_t, int, int);
+
+/* flags for m_copyback0 */
+#define	M_COPYBACK0_COPYBACK	0x0001	/* copyback from cp */
+#define	M_COPYBACK0_PRESERVE	0x0002	/* preserve original data */
+#define	M_COPYBACK0_COW		0x0004	/* do copy-on-write */
+#define	M_COPYBACK0_EXTEND	0x0008	/* extend chain */
 
 const char mclpool_warnmsg[] =
     "WARNING: mclpool limit reached; increase NMBCLUSTERS";
@@ -912,6 +920,13 @@ m_copyup(struct mbuf *n, int len, int ds
 struct mbuf *
 m_split(struct mbuf *m0, int len0, int wait)
 {
+
+	return m_split0(m0, len0, wait, 0);
+}
+
+static struct mbuf *
+m_split0(struct mbuf *m0, int len0, int wait, int copyhdr)
+{
 	struct mbuf *m, *n;
 	unsigned len = len0, remain, len_save;
 
@@ -920,7 +935,7 @@ m_split(struct mbuf *m0, int len0, int w
 	if (m == 0)
 		return (NULL);
 	remain = m->m_len - len;
-	if (m0->m_flags & M_PKTHDR) {
+	if (copyhdr && (m0->m_flags & M_PKTHDR)) {
 		MGETHDR(n, wait, m0->m_type);
 		if (n == 0)
 			return (NULL);
@@ -1049,29 +1064,178 @@ m_devget(char *buf, int totlen, int off0
 void
 m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
 {
+#if defined(DEBUG)
+	struct mbuf *m;
+#endif /* defined(DEBUG) */
+
+	if (m0 == NULL)
+		return;
+
+#if defined(DEBUG)
+	m =
+#endif /* defined(DEBUG) */
+	m_copyback0(m0, off, len, cp,
+	    M_COPYBACK0_COPYBACK|M_COPYBACK0_EXTEND, M_DONTWAIT);
+#if defined(DEBUG)
+	if (m != m0)
+		panic("m_copyback");
+#endif /* defined(DEBUG) */
+}
+
+struct mbuf *
+m_copyback_cow(struct mbuf *m0, int off, int len, caddr_t cp, int how)
+{
+
+	/* don't support chain expansion */
+	KDASSERT(off + len <= m_length(m0));
+
+	return m_copyback0(m0, off, len, cp,
+	    M_COPYBACK0_COPYBACK|M_COPYBACK0_COW, how);
+}
+
+/*
+ * m_makewritable: ensure the specified range writable.
+ */
+struct mbuf *
+m_makewritable(struct mbuf *m0, int off, int len, int how)
+{
+
+	if (len == M_COPYALL)
+		len = m_length(m0) - off; /* XXX */
+
+	return m_copyback0(m0, off, len, NULL,
+	    M_COPYBACK0_PRESERVE|M_COPYBACK0_COW, how);
+}
+
+static struct mbuf *
+m_copyback0(struct mbuf *m0, int off, int len, caddr_t cp, int flags, int how)
+{
 	int mlen;
 	struct mbuf *m = m0, *n;
+	struct mbuf **mpp;
 	int totlen = 0;
 
-	if (m0 == 0)
-		return;
+	KASSERT(m0 != NULL);
+	KASSERT((flags & M_COPYBACK0_PRESERVE) == 0 || cp == NULL);
+	KASSERT((flags & M_COPYBACK0_COPYBACK) == 0 || cp != NULL);
+
+	mpp = &m0;
 	while (off > (mlen = m->m_len)) {
 		off -= mlen;
 		totlen += mlen;
 		if (m->m_next == 0) {
-			n = m_getclr(M_DONTWAIT, m->m_type);
+			if ((flags & M_COPYBACK0_EXTEND) == 0)
+				goto out;
+			n = m_getclr(how, m->m_type);
 			if (n == 0)
 				goto out;
 			n->m_len = min(MLEN, len + off);
 			m->m_next = n;
 		}
+		mpp = &m->m_next;
 		m = m->m_next;
 	}
 	while (len > 0) {
-		mlen = min (m->m_len - off, len);
-		KASSERT(mlen == 0 || !M_READONLY(m));
-		memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen);
-		cp += mlen;
+		mlen = m->m_len - off;
+		if (mlen != 0 && M_READONLY(m)) {
+			char *datap;
+			int eatlen;
+
+			/*
+			 * this mbuf is read-only.
+			 * allocate a new writable mbuf and try again.
+			 */
+
+#if defined(DIAGNOSTIC)
+			if ((flags & M_COPYBACK0_COW) == 0)
+				panic("m_copyback0: read-only");
+#endif /* defined(DIAGNOSTIC) */
+
+			/*
+			 * if we're going to write into the middle of
+			 * a mbuf, split it first.
+			 */
+			if (off > 0 && len < mlen) {
+				n = m_split0(m, off, how, 0);
+				if (n == NULL)
+					goto fail;
+				m->m_next = n;
+				mpp = &m->m_next;
+				m = n;
+				off = 0;
+				continue;
+			}
+
+			/*
+			 * XXX TODO coalesce into the trailingspace of
+			 * the previous mbuf when possible.
+			 */
+
+			/*
+			 * allocate a new mbuf.  copy packet header if needed.
+			 */
+			MGET(n, how, m->m_type);
+			if (n == NULL)
+				goto fail;
+			MCLAIM(n, m->m_owner);
+			if (off == 0 && (m->m_flags & M_PKTHDR) != 0) {
+				/* XXX M_MOVE_PKTHDR */
+				M_COPY_PKTHDR(n, m);
+				n->m_len = MHLEN;
+			} else {
+				if (len >= MINCLSIZE)
+					MCLGET(n, M_DONTWAIT);
+				n->m_len =
+				    (n->m_flags & M_EXT) ? MCLBYTES : MLEN;
+			}
+			if (n->m_len > len)
+				n->m_len = len;
+
+			/*
+			 * free the region which has been overwritten.
+			 * copying data from old mbufs if requested.
+			 */
+			if (flags & M_COPYBACK0_PRESERVE)
+				datap = mtod(n, char *);
+			else
+				datap = NULL;
+			eatlen = n->m_len;
+			if (off > 0) {
+				KDASSERT(len >= mlen);
+				m->m_len = off;
+				m->m_next = n;
+				if (datap) {
+					m_copydata(m, off, mlen, datap);
+					datap += mlen;
+				}
+				eatlen -= mlen;
+				mpp = &m->m_next;
+				m = m->m_next;
+			}
+			while (m != NULL && M_READONLY(m) &&
+			    n->m_type == m->m_type && eatlen > 0) {
+				mlen = min(eatlen, m->m_len);
+				if (datap) {
+					m_copydata(m, 0, mlen, datap);
+					datap += mlen;
+				}
+				m->m_data += mlen;
+				m->m_len -= mlen;
+				eatlen -= mlen;
+				if (m->m_len == 0)
+					*mpp = m = m_free(m);
+			}
+			if (eatlen > 0)
+				n->m_len -= eatlen;
+			n->m_next = m;
+			*mpp = m = n;
+			continue;
+		}
+		mlen = min(mlen, len);
+		if (flags & M_COPYBACK0_COPYBACK) {
+			memcpy(mtod(m, caddr_t) + off, cp, (unsigned)mlen);
+			cp += mlen;
+		}
 		len -= mlen;
 		mlen += off;
 		off = 0;
@@ -1079,16 +1243,25 @@ m_copyback(struct mbuf *m0, int off, int
 		if (len == 0)
 			break;
 		if (m->m_next == 0) {
-			n = m_get(M_DONTWAIT, m->m_type);
+			if ((flags & M_COPYBACK0_EXTEND) == 0)
+				goto out;
+			n = m_get(how, m->m_type);
 			if (n == 0)
 				break;
 			n->m_len = min(MLEN, len);
 			m->m_next = n;
 		}
+		mpp = &m->m_next;
 		m = m->m_next;
 	}
 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
 		m->m_pkthdr.len = totlen;
+
+	return m0;
+
+fail:
+	m_freem(m0);
+	return NULL;
 }
 
 /*

--NextPart-20040831071553-0067900
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="b.diff"

Index: dist/pf/net/pf_ioctl.c
===================================================================
--- dist/pf/net/pf_ioctl.c	(revision 830)
+++ dist/pf/net/pf_ioctl.c	(working copy)
@@ -2763,6 +2763,16 @@ pfil4_wrapper(void *arg, struct mbuf **m
 {
 
 	/*
+	 * ensure that mbufs are writable beforehand
+	 * as it's assumed by pf code.
+	 * ip hdr (60 bytes) + tcp hdr (60 bytes) should be enough.
+	 * XXX inefficient
+	 */
+	*mp = m_makewritable(*mp, 0, 60 + 60, M_DONTWAIT);
+	if (*mp == NULL)
+		return ENOBUFS;
+
+	/*
 	 * If the packet is out-bound, we can't delay checksums
 	 * here.  For in-bound, the checksum has already been
 	 * validated.
@@ -2788,6 +2798,15 @@ int
 pfil6_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
 {
 
+	/*
+	 * ensure that mbufs are writable beforehand
+	 * as it's assumed by pf code.
+	 * XXX inefficient
+	 */
+	*mp = m_makewritable(*mp, 0, M_COPYALL, M_DONTWAIT);
+	if (*mp == NULL)
+		return ENOBUFS;
+
 	if (pf_test6(dir == PFIL_OUT ? PF_OUT : PF_IN, ifp, mp) != PF_PASS) {
 		m_freem(*mp);
 		*mp = NULL;

--NextPart-20040831071553-0067900
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="c.diff"

Index: netinet/ip_fil_netbsd.c
===================================================================
--- netinet/ip_fil_netbsd.c	(revision 837)
+++ netinet/ip_fil_netbsd.c	(working copy)
@@ -125,8 +125,18 @@ struct mbuf **mp;
 struct ifnet *ifp;
 int dir;
 {
-	struct ip *ip = mtod(*mp, struct ip *);
-	int rv, hlen = ip->ip_hl << 2;
+	struct ip *ip;
+	int rv, hlen;
+
+	/*
+	 * ensure that mbufs are writable beforehand
+	 * as it's assumed by ipf code.
+	 * ip hdr (60 bytes) + tcp hdr (60 bytes) should be enough.
+	 * XXX inefficient
+	 */
+	*mp = m_makewritable(*mp, 0, 60 + 60, M_DONTWAIT);
+	if (*mp == NULL)
+		return ENOBUFS;
 
 #if defined(M_CSUM_TCPv4)
 	/*
@@ -143,6 +153,9 @@ int dir;
 	}
 #endif /* M_CSUM_TCPv4 */
 
+	ip = mtod(*mp, struct ip *);
+	hlen = ip->ip_hl << 2;
+
 	/*
 	 * We get the packet with all fields in network byte
 	 * order.  We expect ip_len and ip_off to be in host
@@ -178,6 +191,15 @@ struct ifnet *ifp;
 int dir;
 {
 	
+	/*
+	 * ensure that mbufs are writable beforehand
+	 * as it's assumed by ipf code.
+	 * XXX inefficient
+	 */
+	*mp = m_makewritable(*mp, 0, M_COPYALL, M_DONTWAIT);
+	if (*mp == NULL)
+		return ENOBUFS;
+
 	return (fr_check(mtod(*mp, struct ip *), sizeof(struct ip6_hdr),
 	    ifp, (dir == PFIL_OUT), mp));
 }

--NextPart-20040831071553-0067900
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="d.diff"

Index: netinet6/raw_ip6.c
===================================================================
--- netinet6/raw_ip6.c	(revision 830)
+++ netinet6/raw_ip6.c	(working copy)
@@ -479,11 +479,8 @@ rip6_output(m, va_alist)
 
 	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
 	    in6p->in6p_cksum != -1) {
-		struct mbuf *n;
 		int off;
-		u_int16_t *sump;
 		u_int16_t sum;
-		int sumoff;
 
 		/* compute checksum */
 		if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
@@ -496,16 +493,20 @@ rip6_output(m, va_alist)
 		}
 		off += sizeof(struct ip6_hdr);
 
-		n = m_pulldown(m, off, sizeof(*sump), &sumoff);
-		if (n == NULL) {
-			m = NULL;
+		sum = 0;
+		m = m_copyback_cow(m, off, sizeof(sum), (caddr_t)&sum,
+		    M_DONTWAIT);
+		if (m == NULL) {
 			error = ENOBUFS;
 			goto bad;
 		}
-		sump = (u_int16_t *)(mtod(n, caddr_t) + sumoff);
-		memset(sump, 0, sizeof(*sump));
 		sum = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen);
-		memcpy(sump, &sum, sizeof(*sump));
+		m = m_copyback_cow(m, off, sizeof(sum), (caddr_t)&sum,
+		    M_DONTWAIT);
+		if (m == NULL) {
+			error = ENOBUFS;
+			goto bad;
+		}
 	}
 
 	flags = 0;

--NextPart-20040831071553-0067900--