Subject: kern/17752: dead lock in sbxxxxx functions using local sockets
To: None <gnats-bugs@gnats.netbsd.org>
From: Christian Biere <christianbiere@gmx.de>
List: netbsd-bugs
Date: 07/28/2002 22:42:45
>Number:         17752
>Category:       kern
>Synopsis:       dead locks in sbxxxxx functions using local sockets
>Confidential:   yes
>Severity:       critical
>Priority:       high
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Sun Jul 28 22:43:00 PDT 2002
>Closed-Date:
>Last-Modified:
>Originator:     Christian Biere
>Release:        NetBSD 1.6D
>Organization:
        
>Environment:
System: NetBSD localhost 1.6D NetBSD 1.6D (DURON2) #0: Sat Jul 27 08:52:59 CEST 
2002 root@localhost:/usr/src/sys/arch/i386/compile/DURON2 i386
Architecture: i386
Machine: i386

>Description:

I am working on a little program which uses AF_INET and AF_LOCAL sockets.
In short a server reads from a file e.g. /dev/zero and clients can connect
to receive chunks from this file. The aim is a distributor for (very good)
random device. EOF and unrecoverable errors are not handled very well but 
this isn't critical. The problem is that ice-client runs into a dead lock
in some sbxxxxx functions i.e. sbcompress (and sbdrop in a little modified
version). As I am not the original author and I haven't checked whether I
may treat the source as "open" I have chosen to mark this report confidential.
BTW, I have stripped many code which is not necessary to reproduce this bug.
Therefore the program does not really have much sense.

Regards,
Christian Biere

ice.h:

#if !defined(ICE_H)
#define ICE_H

/* common includes */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <errno.h>
#include <string.h>
#include <strings.h>
#include <stdarg.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <limits.h>
#include <fcntl.h>

#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#include "config.h"

#define STRINGIFY(x) #x
#define XSTRINGIFY(x) STRINGIFY(x)

#define msg printf

int setblocking(int fd, int mode) {
  int flags;
   
  flags = fcntl(fd, F_GETFL);
  if (flags == -1)
    msg("fcntl F_GETFL failed: %s",
	    strerror(errno));
  if (mode)
    return fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
  else	
    return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
}

/*
 * wrap write() so we don't have to care about EAGAIN and EINTR
 */
ssize_t write_retry(int fd, const void *buf, size_t len) {
  ssize_t res;

  for(;;) {
    res = write(fd, buf, len);
    if (res >= 0)
      break;
    else if ((errno != EAGAIN) && (errno != EINTR)) {
      msg("write() failed: %s", strerror(errno));
      break;
    }
  }
 
  return res;	
}

/*
 * wrap read() so we don't have to care about EAGAIN and EINTR
 */
ssize_t read_retry(int fd, void *buf, size_t len) {
  ssize_t res;

  for (;;) {
    res = read(fd, buf, len);
    if (res >= 0)
      break;
    else
      if ((errno != EAGAIN) && (errno != EINTR)) {
        msg("read() failed: %s", strerror(errno));
        break;
      }
  }
 
  return res;	
}

/*
 * do_write() doesn't return until all nbytes are written or
 * an unrecoverable error occurs
 */
ssize_t do_write(int fd, const void *buf, size_t nbytes)
{
  size_t nleft = nbytes;
  ssize_t res;
  
    while (nleft > 0) {
      res = write_retry(fd, buf, nleft);
      if (res <= 0) {
	    msg("write_retry() failed: %s", strerror(errno));
        return res;
	  }	
      else {
        nleft -= res;
		buf += res;
	  }	
    }
  return nbytes;
}

/*
 * do_read() doesn't return until all nbytes are read or
 * an unrecoverable error occurs
 */
ssize_t do_read(int fd, void *buf, size_t nbytes)
{ 
  size_t nleft = nbytes;
  ssize_t res = 0;
  
    while (nleft > 0) {
      res = read_retry(fd, buf, nleft);
      if (res <= 0) {
	    msg("read_retry failed: %s", strerror(errno));
        return res;
	  }	
      else {
        nleft -= res;
		buf += res;
	  }	
    }
  return nbytes;
}

#endif /* ICE_H */

-----------

iced.c:

#include "ice.h"

#include <signal.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <limits.h>
#include <fcntl.h>
#include <pwd.h>

#ifndef DEV_NULL
#define DEV_NULL "/dev/null"
#endif

#define MAX_CHUNK_SIZE 1024

static unsigned chunk_size;
static int debug;

#define MAXFD   256
#define LISTENQ 5

typedef struct slaveproc Slaveproc;
struct slaveproc {
  pid_t      pid;
  char       peer[22];			/* 255.255.255.255/65535\0 */
  int        fd;
  int        amount;
  volatile int dead;
  int        deaf;			/* this is 1, when write() failed */
  int        status;
  Slaveproc *next;
  Slaveproc *prev;
};

static Slaveproc *slaveproc = NULL;

/*
 * print usage and exit
 */
void usage(void) {
  fputs("Usage: iced [-d] [-e] [-f <facility>] [-l <logfile>] "
        "[-p <port>] [-s <n>] [-u <user>] [-v] [entropy source]\n", stderr);
  fputs("Options:\n", stderr);
  fputs("  -p : Listen on <port>. Default is " 
        XSTRINGIFY(DEFAULT_PORT) ".\n", stderr);
  fputs("  -s : Write <n> bytes at once to clients. Default is 16.\n", stderr);
  fputs("  -v : Print debugging messages.\n\n", stderr);
  fputs("The optional argument specifies the entropy source. If no"
        "entropy source is\nspecified, `" 
		ENTROPY_SOURCE
		"' is used.\n", stderr);
  exit(EXIT_SUCCESS);
}

/*
 * block/unblock SIGCHLD; needed for list operations
 */
void block_sigchld(int block) {
  sigset_t mask;

  sigemptyset(&mask);
  sigaddset(&mask, SIGCHLD);
  if (block) {
    if (sigprocmask(SIG_BLOCK, &mask, (sigset_t *)NULL) < 0)
      perror("failed to block SIGCHLD");
  } else {
    if (sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)NULL) < 0)
      perror("failed to unblock SIGCHLD");
  }
}

void sig_sigchld(int signo) {
  pid_t pid;
  int stat;
  Slaveproc *proc;

  pid = wait(&stat);

  /*
   * find child in list and mark it as dead
   *
   * IMPORTANT! block SIGCHLD while operating on the slaveproc list!
   */
  proc = slaveproc;
  while (proc != NULL && proc->pid != pid)
    proc = proc->next;

  if (proc != NULL)
    proc->dead = 1;
}

/*
 * just exit() in case a client has closed the connection
 */
ssize_t exit_on_error(ssize_t res) {
 if (res == -1)
  exit(EXIT_SUCCESS);

 return res; 
}

void serve_entropy(int in_fd, int client, char *client_addr) {
  unsigned char buf[MAX_CHUNK_SIZE];
  ssize_t  res;
  unsigned char cmd, arg = 0;
  u_long amount;

	ssize_t written;

    for (;;) {
      res = read_retry(client, &cmd, 1);
      if (res == 0) {
        exit(EXIT_SUCCESS);
      } else if (res != 1)
        msg("failed to read next command from client %.21s: %s\n",
		    client_addr, strerror(errno));

      switch(cmd) {
        case 0x01:
        case 0x02:
          res = read_retry(client, &arg, 1);
          if (res != 1)
            msg("failed to read command argument from client %.21s: %s\n",
				client_addr, strerror(errno));
          switch (cmd) {
            case 0x01:
              msg("client %.21s sent `read %d'\n",
              client_addr, (int)arg);
              arg = arg < chunk_size ? arg : chunk_size;
              amount = htonl((u_long)arg);
              res = exit_on_error(do_write(in_fd, &amount, sizeof(amount)));
              buf[0] = res = do_read(in_fd, buf + 1, arg);
              written = do_write(client, buf, res + 1);
              if (written != res + 1)
                msg("failed to write entropy to client %.21s: %s\n",
                    client_addr, strerror(errno));
              break;
            case 0x02:
              msg("client %.21s sent `readb %u'\n",
              client_addr, (unsigned)arg);
              while (arg >= chunk_size) {
                amount = htonl((u_long)chunk_size);
                exit_on_error(do_write(in_fd, &amount, sizeof(amount)));
                buf[0] = res = exit_on_error(read_retry(in_fd, buf + 1, arg));
                written = do_write(client, buf, res + 1);
                if (written != res + 1)
                  msg("failed to write entropy to client %.21s: %s\n",
                      client_addr, strerror(errno)); 
                arg -= res;
              }
              if (arg > 0) {
                amount = htonl((u_long)arg);
                exit_on_error(do_write(in_fd, &amount, sizeof(amount)));
                buf[0] = res = exit_on_error(read_retry(in_fd, buf + 1, arg));
                written = do_write(client, buf, res + 1);
                if (written != res + 1)
                  msg("failed to write entropy to client %.21s: %s\n",
                      client_addr, strerror(errno)); 
              }
              break;
          }
          break;
        default:
          msg("client  %.21s sent invalid command\n", client_addr);
          exit(EXIT_SUCCESS);
          /* not reached */
      } /* switch */
    } /* for (;;) */
}

int main(int argc, char *argv[]) {
  pid_t pid;
  int listenfd, entropy_fd;
  struct sockaddr_in servaddr;
  char ch;
  char *entropy_source = NULL;
  int on = 1;
  unsigned port;

  debug = 0;
  chunk_size = 16;
  port = DEFAULT_PORT;

  while ((ch = getopt(argc, argv, "def:l:p:s:u:v")) != -1)
    switch (ch) {
      case 'p':
        port = atoi(optarg);
        if (port == 0)
    	  msg("invalid port identifier: %.32s\n", optarg);
        break;
      case 's':
        chunk_size = atoi(optarg);
        if (chunk_size <= 0 || chunk_size > MAX_CHUNK_SIZE)
          msg("invalid block size; valid range is 1...%d\n",
              MAX_CHUNK_SIZE);
        break;
      case 'v':
        debug = 1;
        break;
      case '?':
      default:
        usage();
    }
  argc -= optind;
  argv += optind;

  if (port == 0 || port > 65535) /* don't even trust the default setting */
    msg("invalid port identifier: %d\n", port);

  if (argc == 1)
    entropy_source = argv[0];
  else if (argc == 0)
    entropy_source = ENTROPY_SOURCE;
  else
    usage();

  /*
   * open entropy source
   */
  msg("using %.128s as entropy source\n", entropy_source);
  entropy_fd = open(entropy_source, O_RDONLY);
  if (entropy_fd < 0)
    perror("could not open entropy source");

  /*
   * obtain a server socket
   */
  listenfd = socket(AF_INET, SOCK_STREAM, 0);
  if (listenfd < 0)
    perror("could not create socket");
  if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR,
                 (char *)&on, sizeof(on)) < 0)
    perror("could not set socket option");

  bzero(&servaddr, sizeof(servaddr));
  servaddr.sin_family      = AF_INET;
  servaddr.sin_addr.s_addr = htonl(INADDR_ANY);
  servaddr.sin_port        = htons((u_short)port);

  if (bind(listenfd, (struct sockaddr *) &servaddr, sizeof(servaddr)) < 0)
    perror("could not bind to address");

  if (listen(listenfd, LISTENQ) < 0)
    perror("could not listen on socket");

  /*
   * set listenfd to non-blocking since we perform select() on it
   *   see: Stevens `Unix Network Programming' Vol.1 Sect. 15.6
   */
   
  if (setblocking(listenfd, 0) == -1)
    perror("failed to set listen socket to non-blocking");

  signal(SIGCHLD, sig_sigchld);
  signal(SIGPIPE, SIG_IGN);

  for (;;) {
    int connfd, maxfd;
    socklen_t len;
    struct sockaddr_in cliaddr;
    int fd[2];
    Slaveproc *proc, *min_amount_proc;
    fd_set readset;
    struct timeval timeout;
    int res, min_amount;

    timeout.tv_sec = 5;
    timeout.tv_usec = 0;

    FD_ZERO(&readset);

    FD_SET(listenfd, &readset);
    maxfd = listenfd;
    if (maxfd < entropy_fd)
      maxfd = entropy_fd;

    proc = slaveproc; 
	while (proc != NULL) {

      /*
       * look for dead children
       */
      if (proc->dead) {
        Slaveproc *tmp_proc;

        close(proc->fd);
        msg("peer %.21s got %d bytes of entropy (status=%d)",
            proc->peer, proc->amount, proc->status);

        /*
         * this list is used in the SIGCHLD signal handler (read only);
         * block SIGCHLD while we delete an item!
         */
        block_sigchld(1);
        if (proc->next != NULL)
          proc->next->prev = proc->prev;
        if (proc->prev != NULL)
          proc->prev->next = proc->next;
        else
          slaveproc = proc->next;
        block_sigchld(0);

        tmp_proc = proc;
        proc = proc->next;
        free(tmp_proc);
        continue;
      }

      /*
       * the last write to this child failed; probably the client
       * has closed the connection, the child has terminated, we just
       * do not have a SIGCHLD for it.
       */
      if (proc->deaf)
        continue;

      FD_SET(proc->fd, &readset);
      if (maxfd < proc->fd)
        maxfd = proc->fd;

      proc = proc->next;
    }

#if 0
    msg("maxfd = %d", maxfd);
#endif

    res = select(maxfd + 1, &readset, NULL, NULL, &timeout);

    if (res == 0) /* nothing to do */
      continue;

    if (res < 0) { /* check for error */
      if (errno != EINTR)
        perror("select() returned an error");
      sleep(1);
      continue;
    }

    if (FD_ISSET(listenfd, &readset)) {
      char client_addr[256];
      char buf[256];

      len = sizeof(cliaddr);
      connfd = accept(listenfd, (struct sockaddr *) &cliaddr, &len);
      if (connfd < 0) {
        if (errno == EINTR || errno == EAGAIN) /* no error */
          continue;
        perror("could not accept connection");
        sleep(1);
        continue;
      }

      sprintf(client_addr, "%.15s/%d",
              inet_ntop(AF_INET, &(cliaddr.sin_addr), buf, sizeof(buf)),
              ntohs(cliaddr.sin_port));

      msg("connection from %.32s\n", client_addr);

	  if (setblocking(connfd, 1) == -1) {
        perror("failed to set socket to blocking, closing connection");
        close(connfd);
        sleep(1);
        continue;
      }

      /*
       * create unix domain socket pair: parent <-> child
       */
      if (socketpair(AF_LOCAL, SOCK_STREAM, 0, fd) < 0) {
        perror("could not create socket pair, closing connection");
        close(connfd);
        sleep(1);
        continue;
      }

      /*
       * chain a new child struct to the head of the doubly linked list
       */
      proc = malloc(sizeof(Slaveproc));
      if (proc == NULL) {
        /*
         * poor child, we can't afford it
         */
        perror("failed to allocate memory");
        sleep(1);
        continue;
      }
      strncpy(proc->peer, client_addr, sizeof(proc->peer));
      proc->peer[sizeof(proc->peer) - 1] = '\0';
      proc->fd     = fd[1];
      proc->amount = 0;
      proc->dead   = 0;
      proc->deaf   = 0;
      proc->status = 0;
      proc->next   = slaveproc;
      proc->prev   = NULL;

      /*
       * block SIGCHLD to squish a race condition (the child may die
       * before the slaveproc list is updated
       */
      block_sigchld(1);
      if ((pid = fork()) == (pid_t)-1) {
        perror("fork() failed, closing connection");
        close(connfd);
        free(proc);
        block_sigchld(0);
        sleep(1);
        continue;
      }

      if (pid != 0) {
        close(connfd);
        close(fd[0]);

        proc->pid    = pid;

        if (proc->next != NULL)
          proc->next->prev = proc;
        slaveproc = proc;
        block_sigchld(0);
        continue;
      } else {
        close(listenfd);
        close(entropy_fd);
        close(fd[1]);
        block_sigchld(0);
        signal(SIGCHLD, SIG_IGN);
        serve_entropy(fd[0], connfd, client_addr);
        close(connfd);
        exit(EXIT_SUCCESS);
      }
    } /* if (FD_ISSET(listenfd, &readset)) */

    /*
     * check for children that are ready to read entropy.
     * perform a simple scheduling:
     * out of all children that are ready to read entropy select
     * the one with the least amount of already consumed entropy
     */
    for (proc = slaveproc, min_amount = INT_MAX, min_amount_proc = NULL;
         proc != NULL;
         proc = proc->next)
      if (FD_ISSET(proc->fd, &readset) &&
          proc->fd <= maxfd && proc->amount < min_amount) {
        min_amount_proc = proc;
        min_amount = proc->amount;
      }

    if (min_amount_proc != NULL) {

      unsigned char buf[MAX_CHUNK_SIZE];
      ssize_t in, out;
      u_long amount;

      /*
       * check how much entropy the slave wants
       */
      res = do_read(min_amount_proc->fd, &amount, sizeof(amount));
      if (res == 0) {
	  /* slaved closed the connection */
          min_amount_proc->dead = 1;
		  continue;
	  }
      if (res != sizeof(amount))
        perror("failed to read block size from slave");
      amount = ntohl((u_long)amount);

      msg("child %u requested %u bytes of entropy\n",
          (unsigned)min_amount_proc->pid, (unsigned)amount);

      if (amount > chunk_size) {
        msg("child %u requested %u bytes, but chunk size is %u\n",
            (unsigned)min_amount_proc->pid, (unsigned)amount, chunk_size);
        amount = chunk_size;
      }

      /*
       * get amount bytes of entropy
       */

     res = do_read(entropy_fd, buf, amount);
     if (res < 0)
       perror("failed to read from entropy source");

      msg("got %u bytes of entropy from source\n", in);

      if (amount != in)
        msg("wanted %u bytes of entropy, got %d bytes\n", (unsigned)amount, in);

      out = write_retry(min_amount_proc->fd, buf, amount);
      if (out < 0) {
        if (errno == EPIPE)
          min_amount_proc->deaf = 1;
        else
          perror("failed to write to child");
      }

      msg("wrote %d bytes of entropy to child\n", out);
      min_amount_proc->amount += amount;
    }

  }

  exit(EXIT_SUCCESS);
}

----------

ice-client:

#include "ice.h"

#include <netdb.h>

int ice_connect(const char *hostname, unsigned port) {
  struct sockaddr_in servaddr;
  struct hostent *hptr;
  struct in_addr **pptr;
  int fd;

  if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
    perror("socket() failed");
    return -1;
  }

  if ((hptr = gethostbyname(hostname)) == NULL) {
    fprintf(stderr, "can't resolve host %s.\n", hostname);
    return -1;
  }

  pptr = (struct in_addr **) hptr->h_addr_list;
  while (*pptr != NULL) {
    bzero(&servaddr, sizeof(servaddr));
    servaddr.sin_family = AF_INET;
    servaddr.sin_port = htons(port);
    memcpy(&servaddr.sin_addr, *pptr, sizeof(struct in_addr));
    fprintf(stderr, "Trying %s:%d...\n", inet_ntoa(servaddr.sin_addr), port);
    if (connect(fd, (struct sockaddr*) &servaddr, sizeof(servaddr)) < 0) {
      fprintf(stderr, "can't connect to `%s:%d': %s\n", hostname,
              port, strerror(errno));
      close(fd);
    } else
      break; /* success */
    pptr++;
  }
  if (*pptr == NULL)
    return -1;

  return fd;
}

void main_loop(int fd) {
  unsigned char cmd = 0x01, amount;

  for (;;) {
    amount = random() & 255;
    do_write(fd, &cmd, 1);
    do_write(fd, &amount, 1);
    do_read(fd, &amount, 1);
  }
}

int main(int argc, const char *argv[]) {

  int fd;
  unsigned port;

  if (argc < 3) {
    fprintf(stderr, "usage: %s HOSTNAME PORT\n", argv[0]);
    exit(EXIT_SUCCESS);
  }

  port = atoi(argv[2]);
  if ((fd = ice_connect(argv[1], port)) == -1)
    exit(EXIT_FAILURE);

  main_loop(fd);
  exit(EXIT_SUCCESS);
}

----------------

config.h:

#define DEFAULT_PORT 12345
#define ENTROPY_SOURCE "/dev/urandom"

-----------------

Makefile:

CC	= gcc 
DEBUG	= -Wall -O -g

CFLAGS	= -g -O2  $(DEBUG) -DHAVE_CONFIG_H
LDFLAGS = 
LIBS	= 

all: iced ice-client

iced: iced.c ice.h
	$(CC) $(CFLAGS) $(LDFLAGS) -o iced iced.c $(LIBS)

ice-client: ice-client.c ice.h
	$(CC) $(CFLAGS) $(LDFLAGS) -o ice-client ice-client.c $(LIBS)

clean:
	rm -f *.o iced ice-client core

------------------
        
>How-To-Repeat:

Compile the included source, start the server and a few clients e.g. 6.
After a few seconds the system should be locked up. Escape to DDB and see
what "tr" says.

$ make
$ ./iced -s 512 &
$ ./ice-client localhost 12345 &
$ ./ice-client localhost 12345 &
$ ./ice-client localhost 12345 &
$ ./ice-client localhost 12345 &
$ ./ice-client localhost 12345 &
$ ./ice-client localhost 12345 &
        
>Fix:
>Release-Note:
>Audit-Trail:
>Unformatted: