Subject: Processes hanging in disk access
To: None <current-users@NetBSD.org>
From: Thomas Klausner <wiz@NetBSD.org>
List: current-users
Date: 02/09/2006 00:12:30
Hi!

I updated my -current from Jan 7 to Feb 7 3.99.15/i386.

Since the update it happened twice that I had some
disk activity (once two cvs updates+2 compilations;
second time only two compilations)
and the processes just stopped in disk access, unkillable.

Currently ps output looks like this:
USER      PID %CPU %MEM   VSZ   RSS TTY    STAT STARTED     TIME COMMAND
wiz      6761  2.0  1.4  5500  7344 ttyp0- Sa   10:51PM 11:51.39 xmms
root        0  0.0  3.3     0 17160 ?      DKs   1:43PM  0:00.09 [swapper]
root        1  0.0  0.0    72    76 ?      Is    1:43PM  0:02.74 init=20
root        2  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [sysmon]
root        3  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [atabus0]
root        4  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [atabus1]
root        5  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [atabus2]
root        6  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [atabus3]
root        7  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [atabus4]
root        8  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [atabus5]
root        9  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [atabus6]
root       10  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [atabus7]
root       11  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [usb0]
root       12  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [usbtask]
root       13  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [usb1]
root       14  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [usb2]
root       15  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [usb3]
root       16  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [usb4]
root       17  0.0  3.3     0 17160 ?      DK    1:43PM  0:00.00 [atapibus0]
root       18  0.0  3.3     0 17160 ?      DK    1:43PM  0:01.59 [raid0]
root       19  0.0  3.3     0 17160 ?      DK    1:43PM  0:03.25 [raidio0]
root       20  0.0  3.3     0 17160 ?      DK    1:43PM  0:07.41 [pagedaemo=
n]
root       21  0.0  3.3     0 17160 ?      DK    1:43PM  0:44.56 [ioflush]
root       22  0.0  3.3     0 17160 ?      DK    1:43PM  0:09.54 [aiodoned]
root       94  0.0  3.3     0 17160 ?      DK    1:43PM  0:19.33 [physiod]
root       96  0.0  0.0    56     4 ?      IL    1:54PM  0:00.00 nfsd: serv=
er=20
root       97  0.0  0.0    56     4 ?      IL    1:54PM  0:00.00 nfsd: serv=
er=20
root       98  0.0  0.0    56     4 ?      IL    1:54PM  0:00.00 nfsd: serv=
er=20
root      166  0.0  0.7  1116  3428 ?      Ss    1:54PM  0:01.21 /usr/sbin/=
ntpd=20
root      183  0.0  0.1   304   628 ?      Is    1:54PM  0:00.16 /usr/sbin/=
sshd=20
root      357  0.0  0.1   192   456 ?      Ss    1:54PM  0:00.15 /usr/libex=
ec/postfix/master=20
postfix   373  0.0  0.1   232   516 ?      I     1:54PM  0:00.05 qmgr -l -t=
 unix -u=20
root      476  0.0  0.1   188   408 ?      Is    1:54PM  0:00.08 /usr/sbin/=
syslogd -s=20
root      562  0.0  0.3  1928  1740 ?      Rs    3:31PM  0:08.85 SCREEN -d =
-RR (screen-4.0.2)
root      665  0.0  0.0   164   244 ?      Ss    1:54PM  0:00.06 /usr/sbin/=
rpcbind -l=20
root     1051  0.0  0.1   272   272 ?      Ss    1:54PM  0:00.07 /usr/sbin/=
mountd=20
root     1112  0.0  0.0    56     4 ?      IL    1:54PM  0:00.00 nfsd: serv=
er=20
root     1178  0.0  0.0   124     4 ?      Is    1:54PM  0:00.01 nfsd: mast=
er=20
root     1373  0.0  0.1   244   392 ?      Ss    1:54PM  0:00.08 /usr/sbin/=
cron=20
root     1403  0.0  0.0    68     4 ?      IWs   1:54PM  0:00.00 /usr/sbin/=
inetd -l=20
postfix  5843  0.0  0.2   192   980 ?      S    11:55PM  0:00.01 pickup -l =
-t unix -u=20
root     9116  0.0  5.1 13584 26624 ?      S     6:33PM  1:49.11 /usr/X11R6=
/bin/X :0
wiz     11775  0.0  0.1   112   676 ttyp0  R+   12:07AM  0:00.00 ps -auxwww=
=20
wiz     19686  0.0  0.0   812     4 ttyp0  R+   12:07AM  0:00.00 /bin/bash=
=20
wiz     28465  0.0  0.3   812  1528 ttyp0  Ss   12:05AM  0:00.01 /bin/bash=
=20
wiz        30  0.0  0.0   376   228 ttyp1  Ss+   6:33PM  0:00.05 screen -d =
-RR -e^Zz (screen-4.0.2)
wiz       630  0.0  0.0   808     4 ttyp2  IWs   3:46PM  0:00.01 /bin/bash=
=20
root      679  0.0  0.1   244   704 ttyp2  I+    3:46PM  0:00.04 ksh=20
root     1568  0.0  0.0  3728     4 ttyp2  DW   11:50PM  0:00.02 /usr/bin/.=
./libexec/cc1 -quiet -I. -I../.. -I../../src -I/usr/obj/www/gtkhtml38/work.=
i386/.buildlink/include -I/usr/obj/www/gtkhtml38/work.i386/.buildlink/inclu=
de/libgnomeui-2.0 -I/usr/obj/www/gtkhtml38/work.i386/.buildlink/include/lib=
gnome-2.0 -I/usr/obj/www/gtkhtml38/work.i386/.buildlink/include/libgnomecan=
vas-2.0 -I/usr/obj/www/gtkhtml38/work.i386/.buildlink/include/gtk-2.0 -I/us=
r/obj/www/gtkhtml38/work.i386/.buildlink/include/libart-2.0 -I/usr/obj/www/=
gtkhtml38/work.i386/.buildlink/include/gconf/2 -I/usr/obj/www/gtkhtml38/wor=
k.i386/.buildlink/include/libbonoboui-2.0 -I/usr/obj/www/gtkhtml38/work.i38=
6/.buildlink/include/gnome-vfs-2.0 -I/usr/obj/www/gtkhtml38/work.i386/.buil=
dlink/lib/gnome-vfs-2.0/include -I/usr/obj/www/gtkhtml38/work.i386/.buildli=
nk/include/gnome-keyring-1 -I/usr/obj/www/gtkhtml38/work.i386/.buildlink/in=
clude/glib/glib-2.0 -I/usr/obj/www/gtkhtml38/work.i386/.buildlink/lib/glib-=
2.0/include -I/usr/obj/www/gtkhtml38/work.i386/.buildlink/include/orbit-2.0=
 -I/usr/obj/www/gtkhtml38/work.i386/.buildlink/include/libbonobo-2.0 -I/usr=
/obj/www/gtkhtml38/work.i386/.buildlink/include/bonobo-activation-2.0 -I/us=
r/obj/www/gtkhtml38/work.i386/.buildlink/include/pango-1.0 -I/usr/obj/www/g=
tkhtml38/work.i386/.buildlink/include/freetype2 -I/usr/obj/www/gtkhtml38/wo=
rk.i386/.buildlink/lib/gtk-2.0/include -I/usr/obj/www/gtkhtml38/work.i386/.=
buildlink/include/atk-1.0 -I/usr/obj/www/gtkhtml38/work.i386/.buildlink/inc=
lude/cairo -I/usr/obj/www/gtkhtml38/work.i386/.buildlink/include/libxml2 -I=
/usr/obj/www/gtkhtml38/work.i386/.buildlink/include/libgnomeprintui-2.2 -I/=
usr/obj/www/gtkhtml38/work.i386/.buildlink/include/libgnomeprint-2.2 -I/usr=
/obj/www/gtkhtml38/work.i386/.buildlink/include/libglade-2.0 -I/usr/obj/www=
/gtkhtml38/work.i386/.buildlink/include/libsoup-2.2 -I/usr/include/krb5 -I/=
usr/obj/www/gtkhtml38/work.i386/.x11-buildlink/include -iprefix /usr/bin/..=
/libexec/ -MD image.d -MF .deps/image.Tpo -MP -MT image.lo -MQ image.o -D__=
GNUC__=3D3 -D__GNUC_MINOR__=3D3 -D__GNUC_PATCHLEVEL__=3D3 -D_REENTRANT -D_P=
THREADS -DHAVE_CONFIG_H -DORBIT2=3D1 -DGNOME_EXPLICIT_TRANSLATION_DOMAIN=3D=
"gtkhtml-3.8" -DBONOBO_EXPLICIT_TRANSLATION_DOMAIN=3D"gtkhtml-3.8" -DG_LOG_=
DOMAIN=3D"gtkhtml" -DSRCDIR=3D"." -DPREFIX=3D"/usr/pkg" -DGLX_GLXEXT_LEGACY=
 -DICONDIR=3D"/usr/pkg/share/gtkhtml-3.8/icons" -DGTKHTML_DATADIR=3D"/usr/p=
kg/share/gtkhtml-3.8" -DGNOMELOCALEDIR=3D"/usr/pkg/share/locale" -DGLADE_DA=
TADIR=3D"/usr/pkg/share/gtkhtml-3.8" -DGDK_DISABLE_DEPRECATED=3D1 -DG_DISAB=
LE_DEPRECATED=3D1 -DPREFIX=3D"/usr/pkg" -DSYSCONFDIR=3D"/usr/pkg/etc" -DDAT=
ADIR=3D"/usr/pkg/share" -DLIBDIR=3D"/usr/pkg/share" -DBONOBO_DISABLE_DEPREC=
ATED=3D1 image.c -quiet -dumpbase image.c -auxbase-strip image o -O2 -Wall =
-Wmissing-prototypes -o=20
root     2472  0.0  0.0    72   164 ttyp2  D    11:52PM  0:00.02 umount /ar=
chive/misc/sandbox//usr/include=20
root     2619  0.0  0.0    24   112 ttyp2  D    11:53PM  0:00.01 umount /ar=
chive/misc/sandbox//usr/include=20
root    16681  0.0  0.0   172   184 ttyp2  T    11:52PM  0:00.00 sh sandbox=
 umount=20
wiz     16370  0.0  0.0   804     4 ttyp4  IWs   6:33PM  0:00.01 bash=20
root    14807  0.0  0.0  3684     4 ttyp6- DW   11:50PM  0:00.01 /usr/bin/.=
./libexec/cc1 -quiet -I/disk/wd1a/archive/cvs/src/tools/compat/obj.i386 -I/=
disk/wd1a/archive/cvs/src/tools/compat/obj.i386/include -I/disk/wd1a/archiv=
e/cvs/src/tools/binstall/../compat -I/disk/wd1a/archive/cvs/src/bin/ls -I/d=
isk/wd1a/archive/cvs/src/usr.sbin/mtree -iprefix /usr/bin/../libexec/ -D__G=
NUC__=3D3 -D__GNUC_MINOR__=3D3 -D__GNUC_PATCHLEVEL__=3D3 -DHAVE_NBTOOL_CONF=
IG_H=3D1 -D_FILE_OFFSET_BITS=3D64 /disk/wd1a/archive/cvs/src/usr.sbin/mtree=
/getid.c -quiet -dumpbase getid.c -auxbase-strip getid.lo o -O -o=20
root      427  0.0  0.0   172     4 ttyE0  IWs   1:54PM  0:00.02 login=20
wiz      1337  0.0  0.2  2624  1152 ttyE0  S     6:33PM  0:02.71 wmpload -d=
evice wi0 -div 12288=20
wiz      8262  0.0  0.2   448   940 ttyE0  S     6:33PM  0:05.85 rxvt -tn r=
xvt -geometry 80x48+0+0 -e screen -d -RR -e^Zz=20
wiz     16836  0.0  0.0    68     4 ttyE0  IW+   6:33PM  0:00.00 xinit /hom=
e/wiz/.xinitrc -- /usr/X11R6/bin/X -bestRefresh -dpi 100 -depth 16=20
wiz     21501  0.0  0.4  1788  2284 ttyE0  S     6:33PM  0:05.18 openbox=20
wiz     24008  0.0  0.1   676   304 ttyE0  S     6:33PM  0:06.38 wmCalClock=
 -tc #307030 -bc black -24=20
wiz     24490  0.0  0.0   776     4 ttyE0  IW    6:33PM  0:00.01 -bash=20
wiz     27615  0.0  0.1   748   640 ttyE0  S     6:33PM  0:02.64 wmpload -d=
evice vr0 -div 1048576=20
wiz     28354  0.0  0.2   536  1076 ttyE0  S     6:33PM  0:03.98 rxvt -tn r=
xvt -geometry 130x48-0+0=20
wiz     28670  0.0  0.0   156     4 ttyE0  IW+   6:33PM  0:00.01 /bin/sh /u=
sr/X11R6/bin/startx -- -bestRefresh -dpi 100 -depth 16=20
root      420  0.0  0.0    56     4 ttyE1  IWs+  1:54PM  0:00.00 /usr/libex=
ec/getty Pc ttyE1=20
root      429  0.0  0.0    56     4 ttyE2  IWs+  1:54PM  0:00.00 /usr/libex=
ec/getty Pc ttyE2=20
root      432  0.0  0.0    56     4 ttyE3  IWs+  1:54PM  0:00.00 /usr/libex=
ec/getty Pc ttyE3=20

Please note the hung cc and umount processes. They don't react to
kill -9. All file systems are local. dmesg doesn't contain any
interrupt errors or similar.

xmms is still playing fine, and I can start new processes;
I guess some would hang, but I haven't found any yet.

Any ideas?
I'll try to get a kernel dump, in case that'd help.
 Thomas