Subject: kern/17517: poll() doesn't handle 3+ way collisions
To: None <gnats-bugs@gnats.netbsd.org>
From: None <dsl@l8s.co.uk>
List: netbsd-bugs
Date: 07/08/2002 14:10:38
>Number:         17517
>Category:       kern
>Synopsis:       poll() doesn't handle 3+ way collisions
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Mon Jul 08 06:09:12 PDT 2002
>Closed-Date:
>Last-Modified:
>Originator:     David Laight
>Release:        NetBSD 1.6B July 4 2002
>Organization:
none
>Environment:
System: NetBSD snowdrop 1.6B NetBSD 1.6B (GENERIC) #5: Mon Jul 8 13:17:37 BST 2002
dsl@snowdrop:/oldroot/usr/bsd-current/src/sys/arch/i386/compile/GENERIC i386
Architecture: i386
Machine: i386
>Description:
	If two processes are blocked in poll() on the same file,
	then one of them times out,
	then another poll request comes in for the same file,
	and a wakeup event happens
	then the second process isn't woken up.
>How-To-Repeat:
	Run the program below:
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <poll.h>

#define nelem(x) (sizeof (x) / sizeof *(x))

typedef struct cmd_t {
    int	delay;
    int tmo;
} cmd_t;

static int
child( int id, int ctl_fd, int data_fd )
{
    cmd_t cmd;
    char ch;

    struct pollfd pfd;
    while (read( ctl_fd, &cmd, sizeof cmd ) == sizeof cmd) {
	printf( "child %d: delay %d\n", id, cmd.delay );
	sleep( cmd.delay );
	printf( "child %d: poll %d\n", id, cmd.tmo );
	pfd.fd = data_fd;
	pfd.events = POLLIN;
	switch (poll( &pfd, 1, cmd.tmo )) {
	case 0:
	    printf( "child %d: poll timedout\n", id );
	    break;
	case 1:
	    if (read( data_fd, &ch, 1 ) == 1)
		printf( "child %d: got data %c\n", id, ch );
	    else
		printf( "child %d: no data available\n", id );
	    break;
	default:
	    printf( "child %d: poll failed\n", id );
	}
    }
    printf( "child %d: exiting\n", id );
    return 0;
}

static void
send_command( int fd, int delay, int tmo )
{
    cmd_t cmd;
    cmd.delay = delay;
    cmd.tmo = tmo;
    write( fd, &cmd, sizeof cmd );
}

int
main( int argc, char **argv )
{
    int pipes[ 8 ];
    int i, c;
    char dump[4];

    setlinebuf( stdout );

    pipe( pipes );
    pipe( pipes + 2 );
    pipe( pipes + 4 );
    pipe( pipes + 6 );

    fcntl( pipes[0], F_SETFL, fcntl( pipes[0], F_GETFL, 0 ) | O_NONBLOCK );

    for (c = 1; c <= 3; c++) {
	if (!fork()) {
	    for (i = 1; i < nelem( pipes ); i += 2) {
		close( pipes[ i ] );
	    }
	    return child( c, pipes[ c * 2 ], pipes[ 0 ] );
	}
    }

    send_command( pipes[3], 0, 2000 );
    send_command( pipes[5], 1, 10000 );
    send_command( pipes[7], 3, 10000 );

    sleep( 5 );
    printf( "parent: writing 2 bytes...\n" );
    write( pipes[ 1 ], "ab", 2 );
    sleep( 10 );
    if (read( pipes[0], dump, sizeof dump ) > 0)
	printf( "parent: data left on pipe\n" );

    return 0;
}

>Fix:
	Don't clear SI_COLL until a wakeup event happens.
	(The change to selwakeup is a gratuitous optimisation)

Index: sys_generic.c
===================================================================
RCS file: /cvsroot/syssrc/sys/kern/sys_generic.c,v
retrieving revision 1.62
diff -u -r1.62 sys_generic.c
--- sys_generic.c	2002/03/22 18:58:59	1.62
+++ sys_generic.c	2002/07/08 13:04:31
@@ -929,13 +929,11 @@
 	mypid = selector->p_pid;
 	if (sip->si_pid == mypid)
 		return;
-	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
-	    p->p_wchan == (caddr_t)&selwait)
+	if (sip->si_pid && !(sip->si_flags & SI_COLL)
+	    && (p = pfind(sip->si_pid))
+	    && p->p_wchan == (caddr_t)&selwait)
 		sip->si_flags |= SI_COLL;
-	else {
-		sip->si_flags &= ~SI_COLL;
-		sip->si_pid = mypid;
-	}
+	sip->si_pid = mypid;
 }
 
 /*
@@ -951,9 +949,11 @@
 	if (sip->si_pid == 0)
 		return;
 	if (sip->si_flags & SI_COLL) {
+		sip->si_pid = 0;
 		nselcoll++;
 		sip->si_flags &= ~SI_COLL;
 		wakeup((caddr_t)&selwait);
+		return;
 	}
 	p = pfind(sip->si_pid);
 	sip->si_pid = 0;
>Release-Note:
>Audit-Trail:
>Unformatted: