Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/rmind-smpnet]: src/sys - Add some initial locking to the IPv4 PCB.
details: https://anonhg.NetBSD.org/src/rev/7aed2419406b
branches: rmind-smpnet
changeset: 787709:7aed2419406b
user: rmind <rmind%NetBSD.org@localhost>
date: Mon Sep 23 00:57:53 2013 +0000
description:
- Add some initial locking to the IPv4 PCB.
- Rename inpcb_lookup_*() routines to be more accurate and add comments.
- Add some comments about connection life-cycle WRT socket layer.
diffstat:
sys/dist/pf/net/pf.c | 8 +-
sys/kern/uipc_socket2.c | 95 ++++++++++---------
sys/netinet/in_pcb.c | 223 +++++++++++++++++++++++++++++-----------------
sys/netinet/in_pcb.h | 10 +-
sys/netinet/portalgo.c | 15 +--
sys/netinet/raw_ip.c | 5 +-
sys/netinet/tcp_input.c | 8 +-
sys/netinet/tcp_subr.c | 8 +-
sys/netinet/tcp_usrreq.c | 6 +-
sys/netinet/tcp_vtw.c | 6 +-
sys/netinet/udp_usrreq.c | 129 ++++++++++++--------------
sys/netinet/udp_var.h | 3 +-
sys/netinet6/in6_pcb.c | 6 +-
13 files changed, 283 insertions(+), 239 deletions(-)
diffs (truncated from 1282 to 300 lines):
diff -r ae6c5239e717 -r 7aed2419406b sys/dist/pf/net/pf.c
--- a/sys/dist/pf/net/pf.c Wed Aug 28 23:59:09 2013 +0000
+++ b/sys/dist/pf/net/pf.c Mon Sep 23 00:57:53 2013 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: pf.c,v 1.69.4.1 2013/07/17 03:16:31 rmind Exp $ */
+/* $NetBSD: pf.c,v 1.69.4.2 2013/09/23 00:57:53 rmind Exp $ */
/* $OpenBSD: pf.c,v 1.552.2.1 2007/11/27 16:37:57 henning Exp $ */
/*
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pf.c,v 1.69.4.1 2013/07/17 03:16:31 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pf.c,v 1.69.4.2 2013/09/23 00:57:53 rmind Exp $");
#include "pflog.h"
@@ -2799,11 +2799,11 @@
#ifdef __NetBSD__
#define in_pcbhashlookup(tbl, saddr, sport, daddr, dport) \
- inpcb_lookup_connect(tbl, saddr, sport, daddr, dport, NULL)
+ inpcb_lookup(tbl, saddr, sport, daddr, dport, NULL)
#define in6_pcbhashlookup(tbl, saddr, sport, daddr, dport) \
in6_pcblookup_connect(tbl, saddr, sport, daddr, dport, 0, NULL)
#define inpcb_lookup_listen(tbl, addr, port, zero) \
- inpcb_lookup_bind(tbl, addr, port)
+ inpcb_lookup_bound(tbl, addr, port)
#define in6_pcblookup_listen(tbl, addr, port, zero) \
in6_pcblookup_bind(tbl, addr, port, zero)
#endif
diff -r ae6c5239e717 -r 7aed2419406b sys/kern/uipc_socket2.c
--- a/sys/kern/uipc_socket2.c Wed Aug 28 23:59:09 2013 +0000
+++ b/sys/kern/uipc_socket2.c Mon Sep 23 00:57:53 2013 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: uipc_socket2.c,v 1.112.2.1 2013/08/28 15:21:48 rmind Exp $ */
+/* $NetBSD: uipc_socket2.c,v 1.112.2.2 2013/09/23 00:57:53 rmind Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -58,7 +58,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.112.2.1 2013/08/28 15:21:48 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.112.2.2 2013/09/23 00:57:53 rmind Exp $");
#include "opt_mbuftrace.h"
#include "opt_sb_max.h"
@@ -82,6 +82,37 @@
/*
* Primitive routines for operating on sockets and socket buffers.
*
+ * Connection life-cycle:
+ *
+ * Normal sequence from the active (originating) side:
+ *
+ * - soisconnecting() is called during processing of connect() call,
+ * - resulting in an eventual call to soisconnected() if/when the
+ * connection is established.
+ *
+ * When the connection is torn down during processing of disconnect():
+ *
+ * - soisdisconnecting() is called and,
+ * - soisdisconnected() is called when the connection to the peer
+ * is totally severed.
+ *
+ * The semantics of these routines are such that connectionless protocols
+ * can call soisconnected() and soisdisconnected() only, bypassing the
+ * in-progress calls when setting up a ``connection'' takes no time.
+ *
+ * From the passive side, a socket is created with two queues of sockets:
+ *
+ * - so_q0 (0) for partial connections (i.e. connections in progress)
+ * - so_q (1) for connections already made and awaiting user acceptance.
+ *
+ * As a protocol is preparing incoming connections, it creates a socket
+ * structure queued on so_q0 by calling sonewconn(). When the connection
+ * is established, soisconnected() is called, and transfers the
+ * socket structure to so_q, making it available to accept().
+ *
+ * If a socket is closed with sockets on either so_q0 or so_q, these
+ * sockets are dropped.
+ *
* Locking rules and assumptions:
*
* o socket::so_lock can change on the fly. The low level routines used
@@ -120,40 +151,9 @@
* domains.
*/
-static pool_cache_t socket_cache;
-
-u_long sb_max = SB_MAX; /* maximum socket buffer size */
-static u_long sb_max_adj; /* adjusted sb_max */
-
-/*
- * Procedures to manipulate state flags of socket
- * and do appropriate wakeups. Normal sequence from the
- * active (originating) side is that soisconnecting() is
- * called during processing of connect() call,
- * resulting in an eventual call to soisconnected() if/when the
- * connection is established. When the connection is torn down
- * soisdisconnecting() is called during processing of disconnect() call,
- * and soisdisconnected() is called when the connection to the peer
- * is totally severed. The semantics of these routines are such that
- * connectionless protocols can call soisconnected() and soisdisconnected()
- * only, bypassing the in-progress calls when setting up a ``connection''
- * takes no time.
- *
- * From the passive side, a socket is created with
- * two queues of sockets: so_q0 for connections in progress
- * and so_q for connections already made and awaiting user acceptance.
- * As a protocol is preparing incoming connections, it creates a socket
- * structure queued on so_q0 by calling sonewconn(). When the connection
- * is established, soisconnected() is called, and transfers the
- * socket structure to so_q, making it available to accept().
- *
- * If a socket is closed with sockets on either
- * so_q0 or so_q, these sockets are dropped.
- *
- * If higher level protocols are implemented in
- * the kernel, the wakeups done here will sometimes
- * cause software-interrupt process scheduling.
- */
+static pool_cache_t socket_cache;
+u_long sb_max = SB_MAX;/* maximum socket buffer size */
+static u_long sb_max_adj; /* adjusted sb_max */
void
soisconnecting(struct socket *so)
@@ -179,6 +179,10 @@
so->so_state |= SS_ISCONNECTED;
if (head && so->so_onq == &head->so_q0) {
if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+ /*
+ * Re-enqueue and wake up any waiters, e.g.
+ * processes blocking on accept().
+ */
soqremque(so, 0);
soqinsque(head, so, 1);
sorwakeup(head);
@@ -242,6 +246,7 @@
* properly linked into the data structure of the original socket.
*
* => Connection status may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED.
+ * => May be called from soft-interrupt context.
* => Listening socket should be locked.
* => Returns the new socket locked.
*/
@@ -269,7 +274,7 @@
return NULL;
}
so->so_type = head->so_type;
- so->so_options = head->so_options &~ SO_ACCEPTCONN;
+ so->so_options = head->so_options & ~SO_ACCEPTCONN;
so->so_linger = head->so_linger;
so->so_state = head->so_state | SS_NOFDREF;
so->so_proto = head->so_proto;
@@ -430,29 +435,29 @@
}
/*
- * Socantsendmore indicates that no more data will be sent on the
+ * socantsendmore(): indicates that no more data will be sent on the
* socket; it would normally be applied to a socket when the user
* informs the system that no more data is to be sent, by the protocol
- * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
- * will be received, and will normally be applied to the socket by a
- * protocol when it detects that the peer will send no more data.
- * Data queued for reading in the socket may yet be read.
+ * code (in case PRU_SHUTDOWN).
*/
-
void
socantsendmore(struct socket *so)
{
-
KASSERT(solocked(so));
so->so_state |= SS_CANTSENDMORE;
sowwakeup(so);
}
+/*
+ * socantrcvmore(): indicates that no more data will be received and
+ * will normally be applied to the socket by a protocol when it detects
+ * that the peer will send no more data. Data queued for reading in
+ * the socket may yet be read.
+ */
void
socantrcvmore(struct socket *so)
{
-
KASSERT(solocked(so));
so->so_state |= SS_CANTRCVMORE;
diff -r ae6c5239e717 -r 7aed2419406b sys/netinet/in_pcb.c
--- a/sys/netinet/in_pcb.c Wed Aug 28 23:59:09 2013 +0000
+++ b/sys/netinet/in_pcb.c Mon Sep 23 00:57:53 2013 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: in_pcb.c,v 1.145.2.2 2013/08/28 15:21:48 rmind Exp $ */
+/* $NetBSD: in_pcb.c,v 1.145.2.3 2013/09/23 00:57:53 rmind Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -96,32 +96,33 @@
* Internet Protocol Control Block (PCB) module.
*
* Each PCB (inpcb_t) is associated with a socket during PCB creation.
- * Its members are protected by the socket lock. Creation is done on
- * PRU_ATTACH protocol command and destruction on PRU_DETACH.
+ * Its members are protected by the socket lock. Creation is done at
+ * pr_attach protocol method and destruction at pr_detach.
*
* Synchronisation
*
* PCBs are inserted into a PCB table (inpcbtable_t). The hash and
* the lists of the table are protected by the inpcbtable_t::inpt_lock.
* There are two main PCB lookup points, which can occur either from
- * the top or the bottom of the stack:
+ * the top or the bottom of the network stack:
*
* - Process performs a protocol operation (e.g. PRU_SEND) and gets
* PCB from the socket, i.e. sotoinpcb(9).
* - When a packet arrives (e.g. UDP datagram), the protocol layer
- * performs 4-tuple a PCB lookup to find an associated socket.
+ * performs a PCB lookup to find an associated socket.
*
* In addition to this, there are cases when multiple PCBs are matched
* and processed (e.g. raw IP or UDP multicast).
*
- * Lock order, XXXrmind: NOT YET
+ * Lock order
*
- * inpcbtable_t::inpt_lock ->
- * struct socket::so_lock
+ * softnet_lock ->
+ * struct socket::so_lock ->
+ * inpcbtable_t::inpt_lock ->
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in_pcb.c,v 1.145.2.2 2013/08/28 15:21:48 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in_pcb.c,v 1.145.2.3 2013/09/23 00:57:53 rmind Exp $");
#include "opt_inet.h"
#include "opt_ipsec.h"
@@ -175,6 +176,8 @@
int lowportmin = IPPORT_RESERVEDMIN;
int lowportmax = IPPORT_RESERVEDMAX;
+static void inpcb_set_state1(inpcb_t *, int);
+
#define INPCBHASH_PORT(table, lport) \
&(table)->inpt_porthashtbl[ntohs(lport) & (table)->inpt_porthash]
#define INPCBHASH_BIND(table, laddr, lport) \
@@ -231,7 +234,7 @@
inp = pool_cache_get(inpcb_cache, PR_NOWAIT);
if (inp == NULL) {
- return ENOBUFS;
+ return ENOMEM;
}
memset(inp, 0, sizeof(*inp));
@@ -249,11 +252,13 @@
}
#endif
so->so_pcb = inp;
- inpcb_set_state(inp, INP_ATTACHED);
+ head = INPCBHASH_PORT(inpt, inp->inp_lport);
- head = INPCBHASH_PORT(inpt, inp->inp_lport);
+ mutex_enter(&inpt->inpt_lock);
+ inpcb_set_state1(inp, INP_ATTACHED);
CIRCLEQ_INSERT_HEAD(&inpt->inpt_queue, &inp->inp_head, inph_queue);
LIST_INSERT_HEAD(head, &inp->inp_head, inph_lhash);
+ mutex_exit(&inpt->inpt_lock);
return 0;
}
@@ -393,7 +398,8 @@
/* XXX-kauth */
if (so->so_uidinfo->ui_uid && !IN_MULTICAST(sin->sin_addr.s_addr)) {
- t = inpcb_lookup_port(inpt, sin->sin_addr, sin->sin_port, 1, &vestige);
+ t = inpcb_lookup_local(inpt, sin->sin_addr,
+ sin->sin_port, 1, &vestige);
/*
* XXX: investigate ramifications of loosening this
* restriction so that as long as both ports have
@@ -415,7 +421,8 @@
}
}
}
- t = inpcb_lookup_port(inpt, sin->sin_addr, sin->sin_port, wild, &vestige);
+ t = inpcb_lookup_local(inpt, sin->sin_addr, sin->sin_port,
+ wild, &vestige);
Home |
Main Index |
Thread Index |
Old Index