NetBSD-Bugs archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
kern/59578: open() hangs indefinitely on FIFO with frequent signal interruption
>Number: 59578
>Category: kern
>Synopsis: open() hangs indefinitely on FIFO with frequent signal interruption
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: kern-bug-people
>State: open
>Class: sw-bug
>Submitter-Id: net
>Arrival-Date: Wed Aug 06 19:00:00 +0000 2025
>Originator: Furkan Onder
>Release: NetBSD 10.0
>Organization:
Cpython
>Environment:
NetBSD home.localhost 10.0 NetBSD 10.0 (GENERIC) #0: Thu Mar 28 08:33:33 UTC 2024 mkrepro%mkrepro.NetBSD.org@localhost:/usr/src/sys/arch/amd64/compile/GENERIC amd64
>Description:
This issue was exposed through the CPython test suite, specifically the `test_eintr` module's `SocketEINTRTest.test_os_open` test,
which hangs indefinitely on NetBSD 10.0 (see https://github.com/python/cpython/issues/137397).
The underlying problem is that when attempting to open a FIFO for writing (O_WRONLY) while the process receives frequent signals,
the open() system call gets repeatedly interrupted with EINTR and never completes successfully, even when a reader process is
available on the other end.
This creates an infinite loop where the process hangs indefinitely, continuously retrying the open() call which always returns EINTR.
>How-To-Repeat:
Save the following C program as `reproducer.c`
```
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
volatile sig_atomic_t signal_count = 0;
/**
* Signal handler for SIGALRM.
*/
void handle_signal(int sig) {
(void)sig; // Suppress unused parameter warning
signal_count++;
}
/**
* Retry open() if it's interrupted by a signal (EINTR).
*/
int safe_open(const char *path, int flags) {
int fd;
while ((fd = open(path, flags)) < 0) {
if (errno != EINTR) {
perror("open");
exit(EXIT_FAILURE);
}
write(STDOUT_FILENO, ".", 1);
}
return fd;
}
/**
* Retry close() if it's interrupted by a signal (EINTR).
*/
int safe_close(int fd) {
int ret;
while ((ret = close(fd)) < 0) {
if (errno != EINTR) {
perror("close");
return ret;
}
write(STDOUT_FILENO, "C", 1);
}
return ret;
}
/**
* Sleep for a specified number of milliseconds.
*/
void sleep_ms(long ms) {
struct timespec ts;
ts.tv_sec = ms / 1000;
ts.tv_nsec = (ms % 1000) * 1000000L;
nanosleep(&ts, NULL);
}
/**
* Set up a timer to send SIGALRM every 10 milliseconds.
*/
void setup_timer(void) {
struct sigaction sa = {0};
sa.sa_handler = handle_signal;
sigaction(SIGALRM, &sa, NULL);
struct itimerval timer = {
.it_value = {0, 10000}, // Start after 10ms
.it_interval = {0, 10000} // Repeat every 10ms
};
if (setitimer(ITIMER_REAL, &timer, NULL) < 0) {
perror("setitimer");
exit(EXIT_FAILURE);
}
}
int main() {
printf("EINTR test - Ctrl+C to stop\n");
setup_timer();
for (int i = 1; i <= 50; ++i) {
char fifo[64];
snprintf(fifo, sizeof(fifo), "/tmp/test_fifo_%d", i);
unlink(fifo);
if (mkfifo(fifo, 0666) < 0) {
perror("mkfifo");
exit(EXIT_FAILURE);
}
pid_t pid = fork();
if (pid < 0) {
perror("fork");
exit(EXIT_FAILURE);
}
if (pid == 0) {
// Child opens FIFO for reading
sleep_ms(50); // 50ms delay to let parent open writer
int fd = safe_open(fifo, O_RDONLY);
safe_close(fd);
exit(EXIT_SUCCESS);
}
else {
// Parent opens FIFO for writing
int fd = safe_open(fifo, O_WRONLY);
safe_close(fd);
wait(NULL);
unlink(fifo);
printf("Loop %d OK (signals: %d)\n", i, signal_count);
signal_count = 0;
}
sleep_ms(1); // Small pause before next iteration
}
printf("Test complete.\n");
return EXIT_SUCCESS;
}
```
Compile and run:
```
gcc -o reproducer reproducer.c
./reproducer
```
The program should complete successfully with output like:
```
EINTR test - Ctrl+C to stop
.....Loop 1 OK (signals: 5)
.....Loop 2 OK (signals: 5)
.....Loop 3 OK (signals: 5)
.....Loop 4 OK (signals: 5)
.....Loop 5 OK (signals: 5)
.....Loop 6 OK (signals: 5)
.....Loop 7 OK (signals: 6)
.....Loop 8 OK (signals: 5)
.....Loop 9 OK (signals: 5)
.....Loop 10 OK (signals: 5)
.....Loop 11 OK (signals: 5)
.....Loop 12 OK (signals: 5)
.....Loop 13 OK (signals: 5)
.....Loop 14 OK (signals: 6)
.....Loop 15 OK (signals: 5)
.....Loop 16 OK (signals: 5)
.....Loop 17 OK (signals: 5)
.....Loop 18 OK (signals: 5)
.....Loop 19 OK (signals: 5)
.....Loop 20 OK (signals: 5)
.....Loop 21 OK (signals: 6)
.....Loop 22 OK (signals: 5)
.....Loop 23 OK (signals: 5)
.....Loop 24 OK (signals: 5)
.....Loop 25 OK (signals: 5)
.....Loop 26 OK (signals: 5)
.....Loop 27 OK (signals: 5)
.....Loop 28 OK (signals: 6)
.....Loop 29 OK (signals: 5)
.....Loop 30 OK (signals: 5)
.....Loop 31 OK (signals: 5)
.....Loop 32 OK (signals: 5)
.....Loop 33 OK (signals: 5)
.....Loop 34 OK (signals: 5)
.....Loop 35 OK (signals: 6)
.....Loop 36 OK (signals: 5)
.....Loop 37 OK (signals: 5)
.....Loop 38 OK (signals: 5)
.....Loop 39 OK (signals: 5)
.....Loop 40 OK (signals: 5)
.....Loop 41 OK (signals: 5)
.....Loop 42 OK (signals: 6)
.....Loop 43 OK (signals: 5)
.....Loop 44 OK (signals: 5)
.....Loop 45 OK (signals: 5)
.....Loop 46 OK (signals: 5)
.....Loop 47 OK (signals: 5)
.....Loop 48 OK (signals: 5)
.....Loop 49 OK (signals: 6)
.....Loop 50 OK (signals: 5)
Test complete.
```
The program hangs indefinitely:
```
EINTR test - Ctrl+C to stop
...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................^C
```
```
ktrace -f trace.out ./reproducer
# After hanging, press Ctrl+C and examine trace
kdump -f trace.out | tail -50
```
Output:
```
17368 17368 reproducer CALL setcontext(0x7f7fff444000)
17368 17368 reproducer RET setcontext JUSTRETURN
17368 17368 reproducer CALL write(1,0x401173,1)
17368 17368 reproducer GIO fd 1 wrote 1 bytes
"."
17368 17368 reproducer RET write 1
17368 17368 reproducer CALL open(0x7f7fff4443d0,1,1)
17368 17368 reproducer NAMI "/tmp/test_fifo_1"
17368 17368 reproducer RET open -1 errno 4 Interrupted system call
17368 17368 reproducer PSIG SIGALRM caught handler=0x400dba mask=(): code=SI_TIMER sent by pid=0, uid=0 with sigval 0x0)
17368 17368 reproducer CALL setcontext(0x7f7fff444000)
17368 17368 reproducer RET setcontext JUSTRETURN
17368 17368 reproducer CALL write(1,0x401173,1)
17368 17368 reproducer GIO fd 1 wrote 1 bytes
"."
17368 17368 reproducer RET write 1
17368 17368 reproducer CALL open(0x7f7fff4443d0,1,1)
17368 17368 reproducer NAMI "/tmp/test_fifo_1"
17368 17368 reproducer RET open -1 errno 4 Interrupted system call
17368 17368 reproducer PSIG SIGALRM caught handler=0x400dba mask=(): code=SI_TIMER sent by pid=0, uid=0 with sigval 0x0)
17368 17368 reproducer CALL setcontext(0x7f7fff444000)
17368 17368 reproducer RET setcontext JUSTRETURN
17368 17368 reproducer CALL write(1,0x401173,1)
17368 17368 reproducer GIO fd 1 wrote 1 bytes
"."
17368 17368 reproducer RET write 1
17368 17368 reproducer CALL open(0x7f7fff4443d0,1,1)
17368 17368 reproducer NAMI "/tmp/test_fifo_1"
17368 17368 reproducer RET open -1 errno 4 Interrupted system call
17368 17368 reproducer PSIG SIGALRM caught handler=0x400dba mask=(): code=SI_TIMER sent by pid=0, uid=0 with sigval 0x0)
17368 17368 reproducer CALL setcontext(0x7f7fff444000)
17368 17368 reproducer RET setcontext JUSTRETURN
17368 17368 reproducer CALL write(1,0x401173,1)
17368 17368 reproducer GIO fd 1 wrote 1 bytes
"."
17368 17368 reproducer RET write 1
17368 17368 reproducer CALL open(0x7f7fff4443d0,1,1)
17368 17368 reproducer NAMI "/tmp/test_fifo_1"
17368 17368 reproducer RET open -1 errno 4 Interrupted system call
17368 17368 reproducer PSIG SIGALRM caught handler=0x400dba mask=(): code=SI_TIMER sent by pid=0, uid=0 with sigval 0x0)
17368 17368 reproducer CALL setcontext(0x7f7fff444000)
17368 17368 reproducer RET setcontext JUSTRETURN
17368 17368 reproducer CALL write(1,0x401173,1)
17368 17368 reproducer GIO fd 1 wrote 1 bytes
"."
17368 17368 reproducer RET write 1
17368 17368 reproducer CALL open(0x7f7fff4443d0,1,1)
17368 17368 reproducer NAMI "/tmp/test_fifo_1"
17368 17368 reproducer RET open RESTART
17368 17368 reproducer PSIG SIGINT SIG_DFL: code=SI_NOINFO
```
>Fix:
Home |
Main Index |
Thread Index |
Old Index