Current-Users archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: named spinning



In Message <20111006013512.561039711D%rebar.astron.com@localhost>,
   christos%zoulas.com@localhost (Christos Zoulas)wrote:

=>On Oct 5,  9:07pm, gary%duzan.org@localhost (Gary Duzan) wrote:
=>-- Subject: Re: named spinning
=>
=>| In Message <j6is6p$osp$1%dough.gmane.org@localhost>,
=>|    christos%astron.com@localhost (Christos Zoulas)wrote:
=>| 
=>| =>In article <20111005225443.25B23115CF5%xen1.duzan.org@localhost>,
=>| =>Gary Duzan  <gary%duzan.org@localhost> wrote:
=>| =>>
=>| =>>(gdb) where
=>| =>>#0  0x00007f7ff5838cda in _sys___kevent50 () from /usr/lib/libc.so.12
=>| =>>#1  0x00007f7ff6005eb1 in __kevent50 () from /usr/lib/libpthread.so.1
=>| =>>#2  0x00007f7ff641d189 in watcher (uap=0x7f7ff7b2a000) at
=>| =>>/usr/src/external/bsd/bind/dist/lib/isc/unix/socket.c:5694
=>| =>>#3  0x00007f7ff60099d5 in pthread_setcancelstate () from
=>| =>>/usr/lib/libpthread.so.1
=>| =>>#4  0x00007f7ff5876350 in ___lwp_park50 () from /usr/lib/libc.so.12
=>| =>>#5  0x00007f7ff4000000 in ?? ()
=>| =>>#6  0x00007f7ff7fff2c0 in ?? ()
=>| =>>#7  0x0000000111110001 in ?? ()
=>| =>>#8  0x0000000033330003 in ?? ()
=>| =>>#9  0x0000000000000000 in ?? ()
=>| =>
=>| =>This is the event loop Can you print cc and *manager?
=>| 
=>|    Had to rebuild without -O2 to get cc.
=>| 
=>| (gdb) frame 2
=>| #2  0x00007f7ff6423b99 in watcher (uap=0x7f7ff7b2a000) at 
/usr/src/external/bsd/bind/dist/lib/isc/unix/socket.c:800
=>| 800             REQUIRE(counterid != -1);
=>| (gdb) print cc
=>| $1 = 1
=>| (gdb) print *manager
=>| $2 = {common = {impmagic = 1229942119, magic = 1098083687, methods = 
0x7f7ff66621d0}, mctx = 0x7f7ff7b05120, lock = {ptm_magic = 858980355, 
ptm_errorcheck = 0 '\0', ptm
_pad1 = "\000\000", ptm_interlock = 0 '\0', ptm_pad2 = "\000\000", ptm_owner = 
0x0,
=>|     ptm_waiters = 0x0, ptm_recursed = 0, ptm_spare2 = 0x0}, fdlock = 
0x7f7ff7b72000, stats = 0x7f7ff7b3d000, kqueue_fd = 8, nevents = 64, events = 
0x7f7ff7b7e000, maxso
cks = 4096, pipe_fds = {5, 7}, fds = 0x7f7ff7b66000, fdstate = 0x7f7ff7b6e000, 
socklist = {
=>|     head = 0x7f7ff7b53000, tail = 0x7f7ff2a6c6c0}, reserved = 512, watcher 
= 0x7f7ff3c00000, shutdown_ok = {ptc_magic = 1431633925, ptc_lock = 0 '\0', 
ptc_waiters = {pt
qh_first = 0x0, ptqh_last = 0x7f7ff7b2a0b0}, ptc_mutex = 0x0, ptc_private = 
0x0}, maxudp = 0}
=>| (gdb) 
=>
=>print manager->events[0]?
=>then find out which fd it is and run fuser to see what it points to.

   Ok, I added some debugging output, like so:

Index: socket.c
===================================================================
RCS file: /usr2/netbsd-cvs/src/external/bsd/bind/dist/lib/isc/unix/socket.c,v
retrieving revision 1.8
diff -u -r1.8 socket.c
--- socket.c    11 Sep 2011 18:55:42 -0000      1.8
+++ socket.c    7 Oct 2011 00:40:01 -0000
@@ -809,6 +809,7 @@
 
 #ifdef USE_KQUEUE
        struct kevent evchange;
+       int i,cc;
 
        memset(&evchange, 0, sizeof(evchange));
        if (msg == SELECT_POKE_READ)
@@ -817,8 +818,13 @@
                evchange.filter = EVFILT_WRITE;
        evchange.flags = EV_ADD;
        evchange.ident = fd;
-       if (kevent(manager->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0)
+       if ((cc=kevent(manager->kqueue_fd, &evchange, 1, NULL, 0, NULL)) != 0)
+       {
+               for (i=0; i<cc;i++)
+                       fprintf(stderr, "unexpected: %d of %d, ident: %u\n", i, 
cc, (unsigned int)manager->events[i].ident);
                result = isc__errno2result(errno);
+               printf("\terrno: %d\n", result);
+       }
 
        return (result);
 #elif defined(USE_EPOLL)
@@ -3788,6 +3794,7 @@
        isc_boolean_t done;
        int ctlfd;
        int cc;
+       int i;
 #ifdef USE_KQUEUE
        const char *fnname = "kevent()";
 #elif defined (USE_EPOLL)
@@ -3812,8 +3819,12 @@
        while (!done) {
                do {
 #ifdef USE_KQUEUE
+                       fprintf(stderr, "<");
                        cc = kevent(manager->kqueue_fd, NULL, 0,
                                    manager->events, manager->nevents, NULL);
+                       fprintf(stderr, ">");
+                       for (i=0; i<cc;i++)
+                               fprintf(stderr, "got: %d of %d, ident: %u\n", 
i, cc, (unsigned int)manager->events[i].ident);
 #elif defined(USE_EPOLL)
                        cc = epoll_wait(manager->epoll_fd, manager->events,
                                        manager->nevents, -1);
===================================================================

and got:

===================================================================
wheel { /usr/src/external/bsd/bind/lib/libisc } # 
/usr2/obj/external/bsd/bind/bin/named/named -f
<>got: 0 of 1, ident: 5
<>got: 0 of 1, ident: 5
<>got: 0 of 1, ident: 5
<
===================================================================

So it is just staying in kevent(), eating CPU.

14090 root      34    0    86M   33M CPU/1      4:31 83.79% 83.79% named

Here is the fstat output:

===================================================================
wheel { /usr/src/external/bsd/bind/dist/lib/isc/unix } # fstat -p `pgrep named`
USER     CMD          PID   FD MOUNT       INUM MODE         SZ|DV R/W
root     named      14090   wd /        13992411 drwxr-xr-x     512 r 
root     named      14090    0 /dev/pts       3 crw--w----   ttyp0 rw
root     named      14090    1 /dev/pts       3 crw--w----   ttyp0 rw
root     named      14090    2 /dev/pts       3 crw--w----   ttyp0 rw
root     named      14090    3* unix dgram  <-> ffff80000e244980
root     named      14090    4 /        13647756 crw-rw-rw-   3658,321633 rw
root     named      14090    5* pipe 0xffff8000b8f0b7e0 <- 0xffff8000948e12b8 rn
root     named      14090    6* pipe 0xffff8000948e12b8 -> 0xffff8000b8f0b7e0 w
root     named      14090    4* kqueue pending 0
root     named      14090    5* crypto 0xffff80000dc36f18
root     named      14090    9 /        13648392 cr--r--r--   1438,320325 r 
[ and the port 53 sockets ]
===================================================================

   Any more ideas what to look for? I think some of the other things
I was seeing were artifacts of being in the debugger, which the
printfs remove. I could try building a debugging libc on another
box with the same current snapshot, but it wouldn't be my first
choice.

                                        Gary Duzan




Home | Main Index | Thread Index | Old Index