NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

kern/43625: signal posting problem



>Number:         43625
>Category:       kern
>Synopsis:       signal posting problem
>Confidential:   yes
>Severity:       serious
>Priority:       high
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Fri Jul 16 01:20:00 +0000 2010
>Originator:     Mark Davies
>Release:        NetBSD 5.0_STABLE
>Organization:
ECS, Victoria Uni. of Wellington, New Zealand.
>Environment:
        
        
System: NetBSD city-art.ecs.vuw.ac.nz 5.0_STABLE NetBSD 5.0_STABLE 
(ECS_WORKSTATION) #7: Sun Feb 28 09:13:18 NZDT 2010 
mark%turakirae.ecs.vuw.ac.nz@localhost:/local/SAVE/build.obj/src/work/5/src/sys/arch/i386/compile/ECS_WORKSTATION
 i386
Architecture: i386
Machine: i386
>Description:
        On 5.0_STABLE/i386 and 5.1_RC3/i386 systems the cnid_metad process from 
netatalk is failing to reap its
        children.  The relevant code is below (trimming out the functional bits)

static volatile sig_atomic_t sigchild = 0;

struct server {
    char  *name;
    pid_t pid;
    int control_fd;               /* file descriptor to child cnid_dbd 
process */
};

static struct server srv[MAXVOLS];


/* -------------------- */
static int maybe_start_dbd(char *dbdpn, char *dbdir, char *usockfn)
{
    pid_t pid;
    struct server *up;

    up = test_usockfn(dbdir);
    if (up && up->pid) {
        /* we already have a process, send our fd */
        if (send_cred(up->control_fd, rqstfd) < 0) {
            /* FIXME */
            return -1;
        }
        return 0;
    }

 
    if ((pid = fork()) < 0) {
        return -1;
    }
    if (pid == 0) {
         ret = execlp(dbdpn, dbdpn, dbdir, buf1, buf2, logconfig, NULL);
         exit(0);
     }
    /*
     *  Parent.
     */
    up->pid = pid;
    return 0;
}

/* ------------------ */
static void catch_child(int sig _U_) 
{
    sigchild = 1;
}

/* ----------------------- */
static void set_signal(void)
{
    struct sigaction sv;
    sigset_t set;

    signal(SIGPIPE, SIG_IGN);

    sv.sa_handler = catch_child;
    sv.sa_flags = SA_NOCLDSTOP;
    sigemptyset(&sv.sa_mask);
    if (sigaction(SIGCHLD, &sv, NULL) < 0) {
        LOG(log_error, logtype_cnid, "cnid_metad: sigaction: %s", 
strerror(errno));
        exit(1);
    }
    /* block everywhere but in pselect */
    sigemptyset(&set);
    sigaddset(&set, SIGCHLD);
    sigprocmask(SIG_BLOCK, &set, NULL);
}

int usockfd_check(int sockfd, const sigset_t *sigset)
{
    int fd;
    socklen_t size;
    fd_set readfds;
    int ret;
    struct timeval tv;
     
    FD_ZERO(&readfds);
    FD_SET(sockfd, &readfds);

    if ((ret = pselect(sockfd + 1, &readfds, NULL, NULL, NULL, sigset)) < 
0) {
        if (errno == EINTR)
            return 0;
        return -1;
    }
}

/* ------------------ */
int main(int argc, char *argv[])
{
    pid_t pid;
    int   status;
    int    ret;
    sigset_t set;


    set_signal();

    sigemptyset(&set);
    sigprocmask(SIG_SETMASK, NULL, &set);
    sigdelset(&set, SIGCHLD);

    while (1) {
        rqstfd = usockfd_check(srvfd, &set);
        /* Collect zombie processes and log what happened to them */
        if (sigchild) while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
            for (i = 0; i < MAXVOLS; i++) {
                if (srv[i].pid == pid) {
                    srv[i].pid = 0;
                    break;
                }
            }
            if (WIFEXITED(status)) {
                LOG(log_info, logtype_cnid, "cnid_dbd pid %i exited with 
exit code %i",
                    pid, WEXITSTATUS(status));
            }
            else if (WIFSIGNALED(status)) {
                LOG(log_info, logtype_cnid, "cnid_dbd pid %i exited with 
signal %i",
                    pid, WTERMSIG(status));
            }
            sigchild = 0;
        }
        if (rqstfd <= 0)
            continue;

        maybe_start_dbd(dbdpn, dbdir, dbp->usock_file);

    }
}

        If you remove the  "if (sigchild)" prior to the "while ((pid = 
waitpid..." it does reap the
        children so the pselect() is being interrupted but the sigchld signal 
is not being posted to
        the process. as seen by this ktrace fragment

        >               [...]
        > 15830      1 cnid_dbd CALL  fcntl(2,8,0xbfbfedf4)
        > 15830      1 cnid_dbd RET   fcntl 0
        > 15830      1 cnid_dbd CALL  close(2)
        > 15830      1 cnid_dbd RET   close 0
        > 15830      1 cnid_dbd CALL  exit(0)
        >   460      1 cnid_metad RET   pselect -1 errno 4 Interrupted system 
        >call
        >   460      1 cnid_metad CALL  wait4(0xffffffff,0xbfbfee6c,1,0)
        >   460      1 cnid_metad RET   wait4 15830/0x3dd6
        >   460      1 cnid_metad CALL  close(4)
        >   460      1 cnid_metad RET   close 0
        >   460      1 cnid_metad CALL  wait4(0xffffffff,0xbfbfee6c,1,0)
        >   460      1 cnid_metad RET   wait4 0
        >   460      1 cnid_metad CALL  pselect(1,0xbfbfe8f4,0,0,0,0xbfbfee38)

        
>How-To-Repeat:
        Try to run cnid_metad
        
>Fix:
        

>Unformatted:
        
        


Home | Main Index | Thread Index | Old Index