tech-net archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: mbuf (cluster) leak?



I'm still experiencing those mbuf/mbuf cluster leaks.

> I'm still wondering whether there's a way of inspecting the contents 
> of those mbuf clusters. I strongly suspect that looking at the contents 
> will give a strong hint at what's going on.
In the meantime, I wrote a (rather gross) program to dump the contents of a kmem pool.

I also sprinkled magic assignements to m_pkthdr.pad0 to get an idea where an mbuf was last touched.

Results:

-- Half of the dead mbufs have last been seen at dev/pci/if_wm.c:wm_send_common_locked() just after the call to bpf_mtap(). They ought to bee m_freem()'d ten lines down (but aren't because of ext_refcnt, see below).

-- The other half has last been seen at net/if_vlan.c:vlan_transmit() in the else branch of the if (error).

-- Both groups have type = 1 (MT_DATA) and flags = 3 (M_EXT | M_PKTHDR).

-- All have ext_refcnt = 1.

-- The corresponding mbuf clusters contain ethernet + IPv6 headers.


So it appears to me there's some missing refcnt decrement.

This is all on -8, but I can't see any relevant changes (save the uipc_mbuf2 -> uipc_mbuf merge) in the code involved.

Does this ring a bell to someone?
#define _KMEMUSER
#define __POOL_EXPOSE

#include <err.h>
#include <unistd.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <nlist.h>
#include <sys/queue.h>
#include <machine/types.h>
#include <sys/pool.h>
#include <kvm.h>

char *nlistf = NULL, *memf = NULL;
kvm_t *kd = NULL;
static char kvm_errbuf[_POSIX2_LINE_MAX];
struct nlist namelist[] = {
	{ .n_name = "_pool_head" },
	{ .n_name = NULL }
};
FILE *out = stdout;
enum { out_null, out_raw, out_hex } outform = out_raw;
unsigned int itemsize;
unsigned char *buf = NULL;
int verbose = 0;

/* from kern/subr_pool.c */
typedef uint32_t pool_item_bitmap_t;
#define BITMAP_SIZE	(CHAR_BIT * sizeof(pool_item_bitmap_t))
#define BITMAP_MASK	(BITMAP_SIZE - 1)

struct pool_item_header {
	/* Page headers */
	LIST_ENTRY(pool_item_header)
			ph_pagelist;	/* pool page list */
	SPLAY_ENTRY(pool_item_header)
			ph_node;	/* Off-page page headers */
	void *		ph_page;	/* this page's address */
	uint32_t	ph_time;	/* last referenced */
	uint16_t	ph_nmissing;	/* # of chunks in use */
	uint16_t	ph_off;		/* start offset in page */
	union {
		/* !PR_NOTOUCH */
		struct {
			LIST_HEAD(, pool_item)
			phu_itemlist;	/* chunk list for this page */
		} phu_normal;
		/* PR_NOTOUCH */
		struct {
			pool_item_bitmap_t phu_bitmap[1];
		} phu_notouch;
	} ph_u;
};

#define ph_itemlist	ph_u.phu_normal.phu_itemlist
#define ph_bitmap	ph_u.phu_notouch.phu_bitmap


struct pool_item {
#ifdef DIAGNOSTIC
	u_int pi_magic;
#endif
#define PI_MAGIC 0xdeaddeadU
	/* Other entries use only this list entry */
	LIST_ENTRY(pool_item)	pi_list;
};

void kread(const void *kptr, void *ptr, size_t len, const char *msg) {
	if ((size_t)kvm_read(kd, (u_long)kptr, (char *)ptr, len) != len)
		errx(1, "kread %lx: %s: %s", (u_long)kptr, msg, kvm_geterr(kd));
}


/* kvm_close(kd); */

void dump_page(struct pool *pp, void *kpih, struct pool_item_header *pih, int part) {
	struct pool_item pi;
	char *p;

	if (verbose) fprintf(stderr, "page %p, time %u\n", pih->ph_page, pih->ph_time);
	switch (outform) {
		case out_hex:
			if (fprintf(out, "page %08p, time %u:\n", pih->ph_page, pih->ph_time) < 0) err(1, "fprintf");
			break;
	}

	p = pih->ph_page + pih->ph_off;
	if (pp->pr_itemoffset != 0)
		p += pp->pr_align - pp->pr_itemoffset;
	for (unsigned int i = 0; i < pp->pr_itemsperpage; i++) {
		int free = 0;

		if (part) {
			if (pp->pr_roflags & PR_NOTOUCH) {
				/*
				cant't do
				free = (pih->ph_bitmap[i / BITMAP_SIZE] & (1 << (i & BITMAP_MASK))) != 0;
				because not enough of *pih has been copied to userland
				*/
				pool_item_bitmap_t bitmap;

				kread((char *)kpih + offsetof(struct pool_item_header, ph_bitmap) + i / BITMAP_SIZE, &bitmap, sizeof(bitmap), "bitmap");
				free = (bitmap & (1 << (i & BITMAP_MASK))) != 0;
			} else {
				void *q;

				/* can't use LIST_FOREACH because q is a kernel address */
				for (q = LIST_FIRST(&pih->ph_itemlist); q != NULL; q = LIST_NEXT(&pi, pi_list)) {
					if (q == p) {
						free = 1;
						break;
					}
					kread(q, &pi, sizeof(pi), "item chain");
				}
			}
		}
		if (verbose) fprintf(stderr, "\titem %p%s\n", p, free ? " (free)" : "");
		if (!free) {
			kread(p, buf, itemsize, "item");
			switch (outform) {
				case out_null:
					break;
				case out_raw:
					if (fwrite(buf, itemsize, 1, out) != 1) err(1, "fwrite");
					break;
				case out_hex:
					if (fprintf(out, "item %08p:\n", p) < 0) err(1, "fprintf");
					for (int j = 0; j < itemsize; j += 16) {
						if (fprintf(out, "%04x  "
						            "%02x %02x %02x %02x %02x %02x %02x %02x  "
							    "%02x %02x %02x %02x %02x %02x %02x %02x\n",
						            j,
							    buf[j + 0], buf[j + 1], buf[j + 2], buf[j + 3], buf[j + 4], buf[j + 5], buf[j + 6], buf[j + 7],
							    buf[j + 8], buf[j + 9], buf[j + 10], buf[j + 11], buf[j + 12], buf[j + 13], buf[j + 14], buf[j + 15]
						   ) < 0) err(1, "fprintf");
					}
					if (fprintf(out, "\n") < 0) err(1, "fprintf");
					break;
				default: errx(1, "outform");
			}
		}
		p += itemsize;
	}
}

void usage(void) {
}

int main(int argc, char *argv[]) {
	char ch;
	int found = 0;

	TAILQ_HEAD(,pool) pool_head;
	struct pool pool;
	void *p;
	char name[32], *arg_name;
	struct pool_item_header pih;

	while ((ch = getopt(argc, argv, "M:N:o:rxnv")) != -1) {
		switch (ch) {
			case 'M':
				memf = optarg;
				break;
			case 'N':
				nlistf = optarg;
				break;
			case 'o':
				if ((out = fopen(optarg, "w")) < 0)
					err(1, "fopen");
				break;
			case 'r':
				outform = out_raw;
				break;
			case 'x':
				outform = out_hex;
				break;
			case 'n':
				outform = out_null;
				break;
			case 'v':
				verbose = 1;
				break;
			default:
				usage();
				errx(1, "arguments");
		}
	}
	argc -= optind;
	argv += optind;
	if (argc < 1) errx(1, "missing pool name");
	arg_name = argv[0];
	if (argc > 1) errx(1, "extra arguments");

	if ((kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, kvm_errbuf)) == NULL) 
		errx(1, "kvm_openfiles: %s", kvm_errbuf);

	if (kvm_nlist(kd, namelist) != 0) errx(1, "namelist");

	kread((void *)namelist[0].n_value, &pool_head, sizeof(pool_head), "pool_head");

	for (p = TAILQ_FIRST(&pool_head); p != NULL; p = TAILQ_NEXT(&pool, pr_poollist)) {
		kread(p, &pool, sizeof(pool), "pool chain");
		kread(pool.pr_wchan, name, sizeof(name), "pr_wchan");
		if (strncmp(name, arg_name, sizeof(name)) == 0) {
			found = 1;
			break;
		}
	}
	if (!found) errx(1, "pool %s not found", arg_name);
	itemsize = pool.pr_size;
	if (verbose) fprintf(stderr, "itemsize=%u\n", itemsize);
	if (outform != out_raw && (itemsize % 16 != 0)) errx(1, "itemsize");
	if ((buf = malloc(itemsize)) == NULL) errx(1, "malloc");
	p = pool.pr_curpage;
	kread(p, &pih, sizeof(pih), "pr_curpage");
	dump_page(&pool, p, &pih, 1);
	for (p = LIST_FIRST(&pool.pr_fullpages); p != NULL; p = LIST_NEXT(&pih, ph_pagelist)) {
		kread(p, &pih, sizeof(pih), "fullpage chain");
		dump_page(&pool, p, &pih, 0);
	}
	for (p = LIST_FIRST(&pool.pr_partpages); p != NULL; p = LIST_NEXT(&pih, ph_pagelist)) {
		kread(p, &pih, sizeof(pih), "partpage chain");
		dump_page(&pool, p, &pih, 1);
	}
}


Home | Main Index | Thread Index | Old Index