Subject: kern/15339: "panic: pool_get: mbpl: page empty" when memory scarce
To: None <gnats-bugs@gnats.netbsd.org>
From: None <dbj@netbsd.org>
List: netbsd-bugs
Date: 01/22/2002 21:37:25
>Number:         15339
>Category:       kern
>Synopsis:       "panic: pool_get: mbpl: page empty" when memory scarce
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    kern-bug-people
>State:          open
>Class:          sw-bug
>Submitter-Id:   net
>Arrival-Date:   Tue Jan 22 19:15:01 PST 2002
>Closed-Date:
>Last-Modified:
>Originator:     Darrin B. Jewell
>Release:        NetBSD 1.5ZA 20020116T1504Z
>Organization:
>Environment:
System: NetBSD quiteria 1.5ZA NetBSD 1.5ZA (QUITERIA) #1: Thu Jan 17 23:58:17 EST 2002 dbj@quiteria:/usr/src/sys/arch/macppc/compile/QUITERIA macppc
Architecture: powerpc
Machine: macppc
>Description:

        After experiencing some problems with my system under heavy
memory usage, I write a test program to allocate memory with calloc()
until calloc failed.  This program can cause my system to panic
with the error: "panic: pool_get: mbpl: page empty"
This program seems to cause the problem to occur within the
first time or two that I run it.

I believe the ethernet driver is calling either MGET or MCLGET with
the M_DONTWAIT flag when this happens.  I believe this translates to
calling pool_cache_get() with a 0 flags value.  Instead of returning a
NULL mbuf pointer, the system panics.

>How-To-Repeat:

I compiled the following program, testmem.c:

/****************************************************************/

#include <stdlib.h>
#include <stdio.h>
#include <assert.h>

#include <unistd.h>

#include <sys/resource.h>

void
dump_rlimit(FILE *f,int resource,const char *s)
{
	int r;
	struct rlimit rl;
	rl.rlim_cur = 0;
	rl.rlim_max = 0;
	r = getrlimit(resource,&rl);
	if (r == -1) {
		char buf[512];
		snprintf(buf,sizeof(buf),"getrlimit(%s)");
		perror(buf);
	}
	assert(r == 0);
	{
		char buf1[512] = "RLIM_INFINITY";
		char buf2[512] = "RLIM_INFINITY";
		if (rl.rlim_cur != RLIM_INFINITY) {
			snprintf(buf1,sizeof(buf1),"0x%llx",rl.rlim_cur);
		}
		if (rl.rlim_max != RLIM_INFINITY) {
			snprintf(buf2,sizeof(buf2),"0x%llx",rl.rlim_max);
		}
		fprintf(f,"%-16s= %18s %18s\n",s,buf1,buf2);
	}
}

#define DUMP_RLIM(f,x) dump_rlimit((f),(x),(#x))

void
dump_rlimits(FILE *f)
{
	DUMP_RLIM(f,RLIMIT_CORE);
	DUMP_RLIM(f,RLIMIT_CPU);
	DUMP_RLIM(f,RLIMIT_DATA);
	DUMP_RLIM(f,RLIMIT_FSIZE);
	DUMP_RLIM(f,RLIMIT_MEMLOCK);
	DUMP_RLIM(f,RLIMIT_NOFILE);
	DUMP_RLIM(f,RLIMIT_NPROC);
	DUMP_RLIM(f,RLIMIT_RSS);
	DUMP_RLIM(f,RLIMIT_STACK);
}

int
main(int argc, char *argv[])
{
	size_t incr = 100*1024*1024;
	size_t tot;
	char psbuf[512];
	int r;
	dump_rlimits(stdout);
	sprintf(psbuf,"ps -v -p %d",getpid());
	tot = 0;
	system(psbuf);
	while (calloc(incr,1)) {
		tot += incr;
		fprintf(stdout,"succeed malloc(%#10lx),  total = %#18lx\n",incr,tot);
		system(psbuf);
	}
	fprintf(stdout,"failed  malloc(%#10lx),  total = %#18lx\n",incr,tot+incr);
	system(psbuf);
	
	return EXIT_SUCCESS;
}

/****************************************************************/

$ cc -o testmem testmem.c
$ ulimit -S -c 0
$ ulimit -S -d `expr 512 \* 1024`
$ ulimit -S -a
time(seconds)        unlimited
file(blocks)         unlimited
data(kbytes)         524288
stack(kbytes)        2048
coredump(blocks)     0
memory(kbytes)       351696
locked memory(kbytes) 117232
process(processes)   160
nofiles(descriptors) 64
$ ./testmem
RLIMIT_CORE     =                0x0      RLIM_INFINITY
RLIMIT_CPU      =      RLIM_INFINITY      RLIM_INFINITY
RLIMIT_DATA     =         0x20000000         0x40000000
RLIMIT_FSIZE    =      RLIM_INFINITY      RLIM_INFINITY
RLIMIT_MEMLOCK  =          0x727c000         0x15774000
RLIMIT_NOFILE   =               0x40              0x6ec
RLIMIT_NPROC    =               0xa0              0x214
RLIMIT_RSS      =         0x15774000         0x15774000
RLIMIT_STACK    =           0x200000          0x2000000
PID STAT    TIME SL RE PAGEIN VSZ RSS LIM TSIZ %CPU %MEM COMMAND
335 S+   0:00.01  0  0      0  92 544   -    8  0.0  0.1 ./testmem 
succeed malloc( 0x6400000),  total =          0x6400000
PID STAT   TIME SL RE PAGEIN    VSZ    RSS LIM TSIZ %CPU %MEM COMMAND
335 S+   0:01.72  0  2      0 102492 103044   -    8 61.0 26.2 ./testmem 
succeed malloc( 0x6400000),  total =          0xc800000
PID STAT   TIME SL RE PAGEIN    VSZ    RSS LIM TSIZ %CPU %MEM COMMAND
335 S+   0:03.43  0  4      0 204892 205544   -    8 78.6 52.3 ./testmem 
succeed malloc( 0x6400000),  total =         0x12c00000
PID STAT   TIME SL RE PAGEIN    VSZ    RSS LIM TSIZ %CPU %MEM COMMAND
335 S+   0:05.23  0  9      9 307292 270216   -    8 55.3 68.7 ./testmem 


The following ddb session was typed into this pr by hand:

panic: pool_get: mbpl: page empty
db> t/u
at panic+174
at pool_get+268
at pool_cache_get+64
at gmac_get+60
at gmac_rint+5c
at gmac_intr+4c
at ext_intr_openpic+d4
at extint_call+0
at 0xed72bc
at pmap_enter+218
at uvm_fault+12e8
at trap+330

db> show pool/l mbpool
POOL mbpl: size 256, align 8, ioff 0, roflags 0x00000000
     pagesz 4096, mtype 0
     alloc 0x1a2d7c, release 0x1a2dac
     minitems 16, minpages 1, maxpages 4294967295, npages 2
     itemsperpage 16, nitems 30, nout 2, harlimit 4294967295

     nget 85, nfail 0, nput 83
     npagealloc 5, npagefree 3, hiwat 5, nidle 1

     no log
db>

db> show pool/p mbpool
POOL mbpl: size 256, align 8, ioff 0, roflags 0x00000000
     pagesz 4096, mtype 0
     alloc 0x1a2d7c, release 0x1a2dac
     minitems 16, minpages 1, maxpages 4294967295, npages 2
     itemsperpage 16, nitems 30, nout 2, harlimit 4294967295

     nget 85, nfail 0, nput 83
     npagealloc 5, npagefree 3, hiwat 5, nidle 1

     page list:
          page 0x115f9000, nmissing 2, time 0,0
          page 0x15eec000, nmissing 0, time 1011751730,760000
     curpage 0x115f9000
db>

db> show pool/p mbpool
POOL mbpl: size 256, align 8, ioff 0, roflags 0x00000000
     pagesz 4096, mtype 0
     alloc 0x1a2d7c, release 0x1a2dac
     minitems 16, minpages 1, maxpages 4294967295, npages 2
     itemsperpage 16, nitems 30, nout 2, harlimit 4294967295

     nget 85, nfail 0, nput 83
     npagealloc 5, npagefree 3, hiwat 5, nidle 1
     cache 0x4ac19c: allocfrom 0x0 freeto 0x116ec1e0
         hits 436357 misses 86 ngroups 1 nitems 1
             group 0x116ec1e0: avail 1
                     0x115f9c00
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
                     0x0
db>

Somre more useful info:

$ sysctl kern.mbuf
kern.mbuf.msize = 256
kern.mbuf.mclbytes = 2048
kern.mbuf.nmbclusters = 1024
kern.mbuf.mblowat = 16
kern.mbuf.mcllowat = 8

$ dmesg
NetBSD 1.5ZA (QUITERIA) #1: Thu Jan 17 23:58:17 EST 2002
    dbj@quiteria:/usr/src/sys/arch/macppc/compile/QUITERIA
total memory = 384 MB
avail memory = 343 MB
using 2048 buffers containing 19760 KB of memory
mainbus0 (root)
cpu0 at mainbus0: 750 (Revision 2215), ID 0 (primary)
cpu0: HID0 8090c0a4<EMCP,DOZE,DPM,ICE,DCE,SGE,BTIC,BHT>
cpu0: 256KB backside cache
uninorth0 at mainbus0
pci0 at uninorth0 bus 0
pci0: i/o space, memory space enabled
pchb0 at pci0 dev 11 function 0
pchb0: Apple Computer Pangea AGP Interface (rev. 0x00)
ofb0 at pci0 dev 16 function 0: ATI Technologies Mobility M3 (AGP)
ofb0: 1024 x 768, 8bpp
wsdisplay0 at ofb0 kbdmux 1: console (std, vt100 emulation)
uninorth1 at mainbus0
pci1 at uninorth1 bus 0
pci1: i/o space, memory space enabled
pchb1 at pci1 dev 11 function 0
pchb1: Apple Computer Pangea Host-PCI Bridge (rev. 0x00)
obio0 at pci1 dev 23 function 0: addr 0x80000000
interrupt-controller at obio0 offset 0x40000 not configured
gpio at obio0 offset 0x50 not configured
escc-legacy at obio0 offset 0x12000 not configured
zsc0 at obio0 offset 0x13000: irq 22,23
zstty0 at zsc0 channel 0
zstty1 at zsc0 channel 1
i2s at obio0 offset 0x10000 not configured
timer at obio0 offset 0x15000 not configured
adb0 at obio0 offset 0x16000 irq 47: 3 targets
aed0 at adb0 addr 0: ADB Event device
akbd0 at adb0 addr 2: PowerBook G3 keyboard
wskbd0 at akbd0: console keyboard, using wsdisplay0
ams0 at adb0 addr 3: EMP trackpad <tpad> 2-button, 400 dpi
wsmouse0 at ams0 mux 0
abtn0 at adb0 addr 7: brightness/volume button
battery at obio0 offset 0x0 not configured
backlight at obio0 offset 0xf300 not configured
i2c at obio0 offset 0x18000 not configured
wdc0 at obio0 offset 0x1f000 irq 19: DMA transfer
atapibus0 at wdc0 channel 0: 2 targets
cd0 at atapibus0 drive 1: <TOSHIBA DVD-ROM SD-R2002, , 1B27> type 5 cdrom removable
cd0: drive supports PIO mode 4, DMA mode 2
wd0 at wdc0 channel 0 drive 0: <IC25N030ATDA04-0>
wd0: drive supports 16-sector PIO transfers, LBA addressing
wd0: 28615 MB, 16383 cyl, 16 head, 63 sec, 512 bytes/sect x 58605120 sectors
wd0: drive supports PIO mode 4, DMA mode 2, Ultra-DMA mode 5 (Ultra/100)
ata4 conf[0] = 0xc519465, cyc = 2 (30 ns), act = 6 (90 ns), inact = 3
ata4 conf[1] = 0x19465, cyc = 8 (120 ns), act = 5 (75 ns), inact = 3
wd0(wdc0:0:0): using PIO mode 4, DMA mode 2, Ultra-DMA mode 4 (Ultra/66) (using DMA data transfers)
cd0(wdc0:0:1): using PIO mode 4, DMA mode 2 (using DMA data transfers)
wi0 at obio0 offset 0x30000 irq 57: 802.11 address 00:30:65:09:c2:e6
wi0: using Lucent chip or unknown chip
ohci0 at pci1 dev 24 function 0: Apple Computer Pangea USB Controller (rev. 0x00)
ohci0: interrupting at irq 27
ohci0: OHCI version 1.0
usb0 at ohci0: USB revision 1.0
uhub0 at usb0
uhub0: Apple Computer OHCI root hub, class 9/0, rev 1.00/1.00, addr 1
uhub0: 2 ports with 2 removable, self powered
ohci1 at pci1 dev 25 function 0: Apple Computer Pangea USB Controller (rev. 0x00)
ohci1: interrupting at irq 28
ohci1: OHCI version 1.0
usb1 at ohci1: USB revision 1.0
uhub1 at usb1
uhub1: Apple Computer OHCI root hub, class 9/0, rev 1.00/1.00, addr 1
uhub1: 2 ports with 2 removable, self powered
uninorth2 at mainbus0
pci2 at uninorth2 bus 0
pci2: i/o space, memory space enabled
pchb2 at pci2 dev 11 function 0
pchb2: Apple Computer Pangea Host-PCI Bridge (rev. 0x00)
fwohci0 at pci2 dev 14 function 0: Apple Computer Pangea Firewire (rev. 0x00)
fwohci0: interrupting at irq 40
fwohci0: OHCI 1.0, 00:03:93:ff:fe:3d:98:1e, 400Mb/s, 2048 max_rec, 8 iso_ctx
gm0 at pci2 dev 15 function 0: Ethernet address 00:03:93:3d:98:1e
gm0: interrupting at irq 41
bmtphy0 at gm0 phy 0: BCM5221 10/100 media interface, rev. 3
bmtphy0: 10baseT, 10baseT-FDX, 100baseTX, 100baseTX-FDX, auto
fw0 at fwohci0: 00:03:93:ff:fe:3d:98:1e:0a:02:20:00:00:00:00:00
boot device: wd0
root on wd0a dumps on wd0b
root file system type: ffs
panic: pool_get: mbpl: page empty
Power sources: AC battery
Battery 0: Q: 100% (3351 mAh, max charge: 3351 mAh) I: 0 mA  V: 12.47 V
syncing disks... done
panic: wdc_exec_command: polled command not done

>Fix:
	
>Release-Note:
>Audit-Trail:
>Unformatted: