Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/rmind-smpnet]: src/sys - Add some initial locking to the IPv4 PCB.



details:   https://anonhg.NetBSD.org/src/rev/7aed2419406b
branches:  rmind-smpnet
changeset: 787709:7aed2419406b
user:      rmind <rmind%NetBSD.org@localhost>
date:      Mon Sep 23 00:57:53 2013 +0000

description:
- Add some initial locking to the IPv4 PCB.
- Rename inpcb_lookup_*() routines to be more accurate and add comments.
- Add some comments about connection life-cycle WRT socket layer.

diffstat:

 sys/dist/pf/net/pf.c     |    8 +-
 sys/kern/uipc_socket2.c  |   95 ++++++++++---------
 sys/netinet/in_pcb.c     |  223 +++++++++++++++++++++++++++++-----------------
 sys/netinet/in_pcb.h     |   10 +-
 sys/netinet/portalgo.c   |   15 +--
 sys/netinet/raw_ip.c     |    5 +-
 sys/netinet/tcp_input.c  |    8 +-
 sys/netinet/tcp_subr.c   |    8 +-
 sys/netinet/tcp_usrreq.c |    6 +-
 sys/netinet/tcp_vtw.c    |    6 +-
 sys/netinet/udp_usrreq.c |  129 ++++++++++++--------------
 sys/netinet/udp_var.h    |    3 +-
 sys/netinet6/in6_pcb.c   |    6 +-
 13 files changed, 283 insertions(+), 239 deletions(-)

diffs (truncated from 1282 to 300 lines):

diff -r ae6c5239e717 -r 7aed2419406b sys/dist/pf/net/pf.c
--- a/sys/dist/pf/net/pf.c      Wed Aug 28 23:59:09 2013 +0000
+++ b/sys/dist/pf/net/pf.c      Mon Sep 23 00:57:53 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: pf.c,v 1.69.4.1 2013/07/17 03:16:31 rmind Exp $        */
+/*     $NetBSD: pf.c,v 1.69.4.2 2013/09/23 00:57:53 rmind Exp $        */
 /*     $OpenBSD: pf.c,v 1.552.2.1 2007/11/27 16:37:57 henning Exp $ */
 
 /*
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pf.c,v 1.69.4.1 2013/07/17 03:16:31 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pf.c,v 1.69.4.2 2013/09/23 00:57:53 rmind Exp $");
 
 #include "pflog.h"
 
@@ -2799,11 +2799,11 @@
 
 #ifdef __NetBSD__
 #define in_pcbhashlookup(tbl, saddr, sport, daddr, dport) \
-    inpcb_lookup_connect(tbl, saddr, sport, daddr, dport, NULL)
+    inpcb_lookup(tbl, saddr, sport, daddr, dport, NULL)
 #define in6_pcbhashlookup(tbl, saddr, sport, daddr, dport) \
     in6_pcblookup_connect(tbl, saddr, sport, daddr, dport, 0, NULL)
 #define inpcb_lookup_listen(tbl, addr, port, zero) \
-    inpcb_lookup_bind(tbl, addr, port)
+    inpcb_lookup_bound(tbl, addr, port)
 #define in6_pcblookup_listen(tbl, addr, port, zero) \
     in6_pcblookup_bind(tbl, addr, port, zero)
 #endif
diff -r ae6c5239e717 -r 7aed2419406b sys/kern/uipc_socket2.c
--- a/sys/kern/uipc_socket2.c   Wed Aug 28 23:59:09 2013 +0000
+++ b/sys/kern/uipc_socket2.c   Mon Sep 23 00:57:53 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: uipc_socket2.c,v 1.112.2.1 2013/08/28 15:21:48 rmind Exp $     */
+/*     $NetBSD: uipc_socket2.c,v 1.112.2.2 2013/09/23 00:57:53 rmind Exp $     */
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -58,7 +58,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.112.2.1 2013/08/28 15:21:48 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.112.2.2 2013/09/23 00:57:53 rmind Exp $");
 
 #include "opt_mbuftrace.h"
 #include "opt_sb_max.h"
@@ -82,6 +82,37 @@
 /*
  * Primitive routines for operating on sockets and socket buffers.
  *
+ * Connection life-cycle:
+ *
+ *     Normal sequence from the active (originating) side:
+ *
+ *     - soisconnecting() is called during processing of connect() call,
+ *     - resulting in an eventual call to soisconnected() if/when the
+ *       connection is established.
+ *
+ *     When the connection is torn down during processing of disconnect():
+ *
+ *     - soisdisconnecting() is called and,
+ *     - soisdisconnected() is called when the connection to the peer
+ *       is totally severed.
+ *
+ *     The semantics of these routines are such that connectionless protocols
+ *     can call soisconnected() and soisdisconnected() only, bypassing the
+ *     in-progress calls when setting up a ``connection'' takes no time.
+ *
+ *     From the passive side, a socket is created with two queues of sockets:
+ *
+ *     - so_q0 (0) for partial connections (i.e. connections in progress)
+ *     - so_q (1) for connections already made and awaiting user acceptance.
+ *
+ *     As a protocol is preparing incoming connections, it creates a socket
+ *     structure queued on so_q0 by calling sonewconn().  When the connection
+ *     is established, soisconnected() is called, and transfers the
+ *     socket structure to so_q, making it available to accept().
+ *
+ *     If a socket is closed with sockets on either so_q0 or so_q, these
+ *     sockets are dropped.
+ *
  * Locking rules and assumptions:
  *
  * o socket::so_lock can change on the fly.  The low level routines used
@@ -120,40 +151,9 @@
  *   domains.
  */
 
-static pool_cache_t socket_cache;
-
-u_long sb_max = SB_MAX;        /* maximum socket buffer size */
-static u_long sb_max_adj;      /* adjusted sb_max */
-
-/*
- * Procedures to manipulate state flags of socket
- * and do appropriate wakeups.  Normal sequence from the
- * active (originating) side is that soisconnecting() is
- * called during processing of connect() call,
- * resulting in an eventual call to soisconnected() if/when the
- * connection is established.  When the connection is torn down
- * soisdisconnecting() is called during processing of disconnect() call,
- * and soisdisconnected() is called when the connection to the peer
- * is totally severed.  The semantics of these routines are such that
- * connectionless protocols can call soisconnected() and soisdisconnected()
- * only, bypassing the in-progress calls when setting up a ``connection''
- * takes no time.
- *
- * From the passive side, a socket is created with
- * two queues of sockets: so_q0 for connections in progress
- * and so_q for connections already made and awaiting user acceptance.
- * As a protocol is preparing incoming connections, it creates a socket
- * structure queued on so_q0 by calling sonewconn().  When the connection
- * is established, soisconnected() is called, and transfers the
- * socket structure to so_q, making it available to accept().
- *
- * If a socket is closed with sockets on either
- * so_q0 or so_q, these sockets are dropped.
- *
- * If higher level protocols are implemented in
- * the kernel, the wakeups done here will sometimes
- * cause software-interrupt process scheduling.
- */
+static pool_cache_t    socket_cache;
+u_long                 sb_max = SB_MAX;/* maximum socket buffer size */
+static u_long          sb_max_adj;     /* adjusted sb_max */
 
 void
 soisconnecting(struct socket *so)
@@ -179,6 +179,10 @@
        so->so_state |= SS_ISCONNECTED;
        if (head && so->so_onq == &head->so_q0) {
                if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+                       /*
+                        * Re-enqueue and wake up any waiters, e.g.
+                        * processes blocking on accept().
+                        */
                        soqremque(so, 0);
                        soqinsque(head, so, 1);
                        sorwakeup(head);
@@ -242,6 +246,7 @@
  * properly linked into the data structure of the original socket.
  *
  * => Connection status may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED.
+ * => May be called from soft-interrupt context.
  * => Listening socket should be locked.
  * => Returns the new socket locked.
  */
@@ -269,7 +274,7 @@
                return NULL;
        }
        so->so_type = head->so_type;
-       so->so_options = head->so_options &~ SO_ACCEPTCONN;
+       so->so_options = head->so_options & ~SO_ACCEPTCONN;
        so->so_linger = head->so_linger;
        so->so_state = head->so_state | SS_NOFDREF;
        so->so_proto = head->so_proto;
@@ -430,29 +435,29 @@
 }
 
 /*
- * Socantsendmore indicates that no more data will be sent on the
+ * socantsendmore(): indicates that no more data will be sent on the
  * socket; it would normally be applied to a socket when the user
  * informs the system that no more data is to be sent, by the protocol
- * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
- * will be received, and will normally be applied to the socket by a
- * protocol when it detects that the peer will send no more data.
- * Data queued for reading in the socket may yet be read.
+ * code (in case PRU_SHUTDOWN).
  */
-
 void
 socantsendmore(struct socket *so)
 {
-
        KASSERT(solocked(so));
 
        so->so_state |= SS_CANTSENDMORE;
        sowwakeup(so);
 }
 
+/*
+ * socantrcvmore(): indicates that no more data will be received and
+ * will normally be applied to the socket by a protocol when it detects
+ * that the peer will send no more data.  Data queued for reading in
+ * the socket may yet be read.
+ */
 void
 socantrcvmore(struct socket *so)
 {
-
        KASSERT(solocked(so));
 
        so->so_state |= SS_CANTRCVMORE;
diff -r ae6c5239e717 -r 7aed2419406b sys/netinet/in_pcb.c
--- a/sys/netinet/in_pcb.c      Wed Aug 28 23:59:09 2013 +0000
+++ b/sys/netinet/in_pcb.c      Mon Sep 23 00:57:53 2013 +0000
@@ -1,4 +1,4 @@
-/*     $NetBSD: in_pcb.c,v 1.145.2.2 2013/08/28 15:21:48 rmind Exp $   */
+/*     $NetBSD: in_pcb.c,v 1.145.2.3 2013/09/23 00:57:53 rmind Exp $   */
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -96,32 +96,33 @@
  * Internet Protocol Control Block (PCB) module.
  *
  *     Each PCB (inpcb_t) is associated with a socket during PCB creation.
- *     Its members are protected by the socket lock.  Creation is done on
- *     PRU_ATTACH protocol command and destruction on PRU_DETACH.
+ *     Its members are protected by the socket lock.  Creation is done at
+ *     pr_attach protocol method and destruction at pr_detach.
  *
  * Synchronisation
  *
  *     PCBs are inserted into a PCB table (inpcbtable_t).  The hash and
  *     the lists of the table are protected by the inpcbtable_t::inpt_lock.
  *     There are two main PCB lookup points, which can occur either from
- *     the top or the bottom of the stack:
+ *     the top or the bottom of the network stack:
  *
  *     - Process performs a protocol operation (e.g. PRU_SEND) and gets
  *       PCB from the socket, i.e. sotoinpcb(9).
  *     - When a packet arrives (e.g. UDP datagram), the protocol layer
- *       performs 4-tuple a PCB lookup to find an associated socket.
+ *       performs a PCB lookup to find an associated socket.
  *
  *     In addition to this, there are cases when multiple PCBs are matched
  *     and processed (e.g. raw IP or UDP multicast).
  *
- * Lock order, XXXrmind: NOT YET
+ * Lock order
  *
- *     inpcbtable_t::inpt_lock ->
- *             struct socket::so_lock
+ *     softnet_lock ->
+ *             struct socket::so_lock ->
+ *                     inpcbtable_t::inpt_lock ->
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in_pcb.c,v 1.145.2.2 2013/08/28 15:21:48 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in_pcb.c,v 1.145.2.3 2013/09/23 00:57:53 rmind Exp $");
 
 #include "opt_inet.h"
 #include "opt_ipsec.h"
@@ -175,6 +176,8 @@
 int    lowportmin  = IPPORT_RESERVEDMIN;
 int    lowportmax  = IPPORT_RESERVEDMAX;
 
+static void            inpcb_set_state1(inpcb_t *, int);
+
 #define        INPCBHASH_PORT(table, lport) \
        &(table)->inpt_porthashtbl[ntohs(lport) & (table)->inpt_porthash]
 #define        INPCBHASH_BIND(table, laddr, lport) \
@@ -231,7 +234,7 @@
 
        inp = pool_cache_get(inpcb_cache, PR_NOWAIT);
        if (inp == NULL) {
-               return ENOBUFS;
+               return ENOMEM;
        }
        memset(inp, 0, sizeof(*inp));
 
@@ -249,11 +252,13 @@
        }
 #endif
        so->so_pcb = inp;
-       inpcb_set_state(inp, INP_ATTACHED);
+       head = INPCBHASH_PORT(inpt, inp->inp_lport);
 
-       head = INPCBHASH_PORT(inpt, inp->inp_lport);
+       mutex_enter(&inpt->inpt_lock);
+       inpcb_set_state1(inp, INP_ATTACHED);
        CIRCLEQ_INSERT_HEAD(&inpt->inpt_queue, &inp->inp_head, inph_queue);
        LIST_INSERT_HEAD(head, &inp->inp_head, inph_lhash);
+       mutex_exit(&inpt->inpt_lock);
 
        return 0;
 }
@@ -393,7 +398,8 @@
 
                /* XXX-kauth */
                if (so->so_uidinfo->ui_uid && !IN_MULTICAST(sin->sin_addr.s_addr)) {
-                       t = inpcb_lookup_port(inpt, sin->sin_addr, sin->sin_port, 1, &vestige);
+                       t = inpcb_lookup_local(inpt, sin->sin_addr,
+                           sin->sin_port, 1, &vestige);
                        /*
                         * XXX: investigate ramifications of loosening this
                         *      restriction so that as long as both ports have
@@ -415,7 +421,8 @@
                                }
                        }
                }
-               t = inpcb_lookup_port(inpt, sin->sin_addr, sin->sin_port, wild, &vestige);
+               t = inpcb_lookup_local(inpt, sin->sin_addr, sin->sin_port,
+                   wild, &vestige);



Home | Main Index | Thread Index | Old Index