Subject: percpu storage allocator
To: None <tech-kern@netbsd.org>
From: YAMAMOTO Takashi <yamt@mwd.biglobe.ne.jp>
List: tech-kern
Date: 11/12/2007 20:41:35
--NextPart-20071112203611-0619600
Content-Type: Text/Plain; charset=us-ascii
hi,
the attached files contain an implementation of percpu storage.
(and some users of it.)
the patch is against vmlocking branch.
any comments?
YAMAMOTO Takashi
--NextPart-20071112203611-0619600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="percpu.h"
/* $NetBSD$ */
/*-
* Copyright (c)2007 YAMAMOTO Takashi,
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _SYS_PERCPU_H_
#define _SYS_PERCPU_H_
#include <sys/types.h>
struct cpu_info;
typedef struct percpu percpu_t;
typedef struct percpu_cpu {
size_t pcc_size;
void *pcc_data;
} percpu_cpu_t;
void percpu_init(void);
void percpu_init_cpu(struct cpu_info *);
percpu_t *percpu_alloc(size_t);
void percpu_free(percpu_t *, size_t);
void *percpu_getptr(percpu_t *);
void percpu_traverse_enter(void);
void percpu_traverse_exit(void);
void *percpu_getptr_remote(percpu_t *, struct cpu_info *);
#endif /* _SYS_PERCPU_H_ */
--NextPart-20071112203611-0619600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="subr_percpu.c"
/* $NetBSD$ */
/*-
* Copyright (c)2007 YAMAMOTO Takashi,
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD$");
#include <sys/param.h>
#include <sys/kmem.h>
#include <sys/kernel.h>
#include <sys/mutex.h>
#include <sys/percpu.h>
#include <sys/rwlock.h>
#include <sys/vmem.h>
#include <sys/xcall.h>
static krwlock_t percpu_swap_lock;
static kmutex_t percpu_allocation_lock;
static vmem_t *percpu_offset_arena;
static unsigned int percpu_nextoff;
#define PERCPU_QUANTUM_SIZE (ALIGNBYTES + 1)
static percpu_cpu_t *
cpu_percpu(struct cpu_info *ci)
{
return &ci->ci_data.cpu_percpu;
}
static unsigned int
percpu_offset(percpu_t *pc)
{
return (uintptr_t)pc;
}
/*
* percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge
*/
static void
percpu_cpu_swap(void *p1, void *p2)
{
struct cpu_info * const ci = p1;
percpu_cpu_t * const newpcc = p2;
percpu_cpu_t * const pcc = cpu_percpu(ci);
rw_enter(&percpu_swap_lock, RW_WRITER);
/* unless anyone has beaten us... */
if (newpcc->pcc_size > pcc->pcc_size) {
percpu_cpu_t tmp;
int s;
s = splhigh();
/* copy data to new storage */
memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size);
/* swap */
tmp = *pcc;
*pcc = *newpcc;
splx(s);
*newpcc = tmp;
}
rw_exit(&percpu_swap_lock);
}
/*
* percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space
*/
static void
percpu_cpu_enlarge(size_t size)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
for (CPU_INFO_FOREACH(cii, ci)) {
percpu_cpu_t pcc;
pcc.pcc_data = kmem_zalloc(size, KM_SLEEP);
pcc.pcc_size = size;
if (!mp_online) {
percpu_cpu_swap(ci, &pcc);
} else {
uint64_t where;
where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci);
xc_wait(where);
}
KASSERT(pcc.pcc_size < size);
if (pcc.pcc_data != NULL)
kmem_free(pcc.pcc_data, pcc.pcc_size);
}
}
/*
* percpu_backend_alloc: vmem import callback for percpu_offset_arena
*/
static vmem_addr_t
percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize,
vm_flag_t vmflags)
{
unsigned int offset;
unsigned int nextoff;
KASSERT(dummy == NULL);
if ((vmflags & VM_NOSLEEP) != 0)
return VMEM_ADDR_NULL;
mutex_enter(&percpu_allocation_lock);
offset = percpu_nextoff;
percpu_nextoff = nextoff = percpu_nextoff + size;
mutex_exit(&percpu_allocation_lock);
percpu_cpu_enlarge(nextoff);
*resultsize = size;
return (vmem_addr_t)offset;
}
/*
* percpu_init: subsystem initialization
*/
void
percpu_init(void)
{
rw_init(&percpu_swap_lock);
mutex_init(&percpu_allocation_lock, MUTEX_DEFAULT, IPL_NONE);
percpu_offset_arena = vmem_create("percpu", 0, 0, PERCPU_QUANTUM_SIZE,
percpu_backend_alloc, NULL, NULL, PERCPU_QUANTUM_SIZE * 32,
VM_SLEEP, IPL_NONE);
}
void
percpu_init_cpu(struct cpu_info *ci)
{
percpu_cpu_t * const pcc = cpu_percpu(ci);
size_t size = percpu_nextoff;
pcc->pcc_size = size;
if (size) {
pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP);
}
}
/*
* percpu_alloc: allocate percpu data
*/
percpu_t *
percpu_alloc(size_t size)
{
unsigned int offset;
offset = vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT);
return (percpu_t *)(uintptr_t)offset;
}
/*
* percpu_alloc: free percpu data
*/
void
percpu_free(percpu_t *pc, size_t size)
{
vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size);
}
/*
* percpu_getptr:
*
* => called with preemption disabled
*/
void *
percpu_getptr(percpu_t *pc)
{
return percpu_getptr_remote(pc, curcpu());
}
/*
* percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote:
* helpers to access remote cpu's percpu data.
*
* => called in thread context.
* => typical usage would be:
*
* sum = 0;
* percpu_traverse_enter();
* for (CPU_INFO_FOREACH(cii, ci)) {
* unsigned int *p = percpu_getptr_remote(pc, ci);
* sum += *p;
* }
* percpu_traverse_exit();
*/
void
percpu_traverse_enter(void)
{
rw_enter(&percpu_swap_lock, RW_READER);
}
void
percpu_traverse_exit(void)
{
rw_exit(&percpu_swap_lock);
}
void *
percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci)
{
return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)];
}
--NextPart-20071112203611-0619600
Content-Type: Text/Plain; charset=us-ascii
Content-Disposition: attachment; filename="a.diff"
Index: sys/cpu_data.h
===================================================================
RCS file: /cvsroot/src/sys/sys/cpu_data.h,v
retrieving revision 1.7.6.10
diff -u -p -r1.7.6.10 cpu_data.h
--- sys/cpu_data.h 1 Nov 2007 21:58:24 -0000 1.7.6.10
+++ sys/cpu_data.h 12 Nov 2007 11:34:24 -0000
@@ -46,6 +46,7 @@ struct lwp;
#include <sys/sched.h> /* for schedstate_percpu */
#include <sys/condvar.h>
+#include <sys/percpu.h>
/*
* MI per-cpu data
@@ -90,6 +91,7 @@ struct cpu_data {
kmutex_t cpu_uarea_lock; /* uarea alloc lock */
u_int cpu_uarea_cnt; /* count of free uareas */
vaddr_t cpu_uarea_list; /* free uareas */
+ percpu_cpu_t cpu_percpu; /* per-cpu data */
};
/* compat definitions */
Index: sys/mbuf.h
===================================================================
RCS file: /cvsroot/src/sys/sys/mbuf.h,v
retrieving revision 1.135.2.1
diff -u -p -r1.135.2.1 mbuf.h
--- sys/mbuf.h 1 Sep 2007 12:56:51 -0000 1.135.2.1
+++ sys/mbuf.h 12 Nov 2007 11:34:24 -0000
@@ -80,6 +80,7 @@
#endif
#include <sys/pool.h>
#include <sys/queue.h>
+#include <sys/percpu.h>
/* For offsetof() */
#if defined(_KERNEL) || defined(_STANDALONE)
@@ -110,15 +111,33 @@ struct mowner {
char mo_name[16]; /* owner name (fxp0) */
char mo_descr[16]; /* owner description (input) */
LIST_ENTRY(mowner) mo_link; /* */
- u_long mo_claims; /* # of small mbuf claimed */
- u_long mo_releases; /* # of small mbuf released */
- u_long mo_cluster_claims; /* # of M_CLUSTER mbuf claimed */
- u_long mo_cluster_releases; /* # of M_CLUSTER mbuf released */
- u_long mo_ext_claims; /* # of M_EXT mbuf claimed */
- u_long mo_ext_releases; /* # of M_EXT mbuf released */
+ percpu_t *mo_counters;
};
-#define MOWNER_INIT(x, y) { x, y, { NULL, NULL }, 0, 0, 0, 0, 0, 0 }
+#define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y }
+
+enum mowner_counter_index {
+ MOWNER_COUNTER_CLAIMS, /* # of small mbuf claimed */
+ MOWNER_COUNTER_RELEASES, /* # of small mbuf released */
+ MOWNER_COUNTER_CLUSTER_CLAIMS, /* # of M_CLUSTER mbuf claimed */
+ MOWNER_COUNTER_CLUSTER_RELEASES,/* # of M_CLUSTER mbuf released */
+ MOWNER_COUNTER_EXT_CLAIMS, /* # of M_EXT mbuf claimed */
+ MOWNER_COUNTER_EXT_RELEASES, /* # of M_EXT mbuf released */
+
+ MOWNER_COUNTER_NCOUNTERS,
+};
+
+struct mowner_counter {
+ u_long mc_counter[MOWNER_COUNTER_NCOUNTERS];
+};
+
+/* userland-exported version */
+struct mowner_user {
+ char mo_name[16]; /* owner name (fxp0) */
+ char mo_descr[16]; /* owner description (input) */
+ LIST_ENTRY(mowner) mo_link; /* */
+ u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */
+};
/*
* Macros for type conversion
@@ -374,65 +393,28 @@ do { \
#ifdef MBUFTRACE
/*
- * mbuf allocation tracing macros
- *
+ * mbuf allocation tracing
*/
-#define _MOWNERINIT(m, type) \
- ((m)->m_owner = &unknown_mowners[(type)], (m)->m_owner->mo_claims++)
-
-#define _MOWNERREF(m, flags) do { \
- if ((flags) & M_EXT) \
- (m)->m_owner->mo_ext_claims++; \
- if ((flags) & M_CLUSTER) \
- (m)->m_owner->mo_cluster_claims++; \
-} while (/* CONSTCOND */ 0)
-
-#define MOWNERREF(m, flags) MBUFLOCK( _MOWNERREF((m), (flags)); );
-
-#define _MOWNERREVOKE(m, all, flags) do { \
- if ((flags) & M_EXT) \
- (m)->m_owner->mo_ext_releases++; \
- if ((flags) & M_CLUSTER) \
- (m)->m_owner->mo_cluster_releases++; \
- if (all) { \
- (m)->m_owner->mo_releases++; \
- (m)->m_owner = &revoked_mowner; \
- } \
-} while (/* CONSTCOND */ 0)
-
-#define _MOWNERCLAIM(m, mowner) do { \
- (m)->m_owner = (mowner); \
- (mowner)->mo_claims++; \
- if ((m)->m_flags & M_EXT) \
- (mowner)->mo_ext_claims++; \
- if ((m)->m_flags & M_CLUSTER) \
- (mowner)->mo_cluster_claims++; \
-} while (/* CONSTCOND */ 0)
-
-#define MCLAIM(m, mowner) \
- MBUFLOCK( \
- if ((m)->m_owner != (mowner) && (mowner) != NULL) { \
- _MOWNERREVOKE((m), 1, (m)->m_flags); \
- _MOWNERCLAIM((m), (mowner)); \
- } \
- )
-
-#define MOWNER_ATTACH(mo) LIST_INSERT_HEAD(&mowners, (mo), mo_link)
-#define MOWNER_DETACH(mo) LIST_REMOVE((mo), mo_link)
-#define MBUFTRACE_ASSERT(cond) KASSERT(cond)
+void mowner_init(struct mbuf *, int);
+void mowner_ref(struct mbuf *, int);
+void m_claim(struct mbuf *, struct mowner *);
+void mowner_revoke(struct mbuf *, bool, int);
+void mowner_attach(struct mowner *);
+void mowner_detach(struct mowner *);
+void m_claimm(struct mbuf *, struct mowner *);
#else
-#define _MOWNERINIT(m, type) do { } while (/* CONSTCOND */ 0)
-#define _MOWNERREF(m, flags) do { } while (/* CONSTCOND */ 0)
-#define MOWNERREF(m, flags) do { } while (/* CONSTCOND */ 0)
-#define _MOWNERREVOKE(m, all, flags) do { } while (/* CONSTCOND */ 0)
-#define _MOWNERCLAIM(m, mowner) do { } while (/* CONSTCOND */ 0)
-#define MCLAIM(m, mowner) do { } while (/* CONSTCOND */ 0)
-#define MOWNER_ATTACH(mo) do { } while (/* CONSTCOND */ 0)
-#define MOWNER_DETACH(mo) do { } while (/* CONSTCOND */ 0)
+#define mowner_init(m, type) do { } while (/* CONSTCOND */ 0)
+#define mowner_ref(m, flags) do { } while (/* CONSTCOND */ 0)
+#define mowner_revoke(m, all, flags) do { } while (/* CONSTCOND */ 0)
+#define m_claim(m, mowner) do { } while (/* CONSTCOND */ 0)
+#define mowner_attach(mo) do { } while (/* CONSTCOND */ 0)
+#define mowner_detach(mo) do { } while (/* CONSTCOND */ 0)
#define m_claimm(m, mo) do { } while (/* CONSTCOND */ 0)
-#define MBUFTRACE_ASSERT(cond) do { } while (/* CONSTCOND */ 0)
#endif
+#define MCLAIM(m, mo) m_claim((m), (mo))
+#define MOWNER_ATTACH(mo) mowner_attach(mo)
+#define MOWNER_DETACH(mo) mowner_detach(mo)
/*
* mbuf allocation/deallocation macros:
@@ -447,39 +429,8 @@ do { \
* If 'how' is M_WAIT, these macros (and the corresponding functions)
* are guaranteed to return successfully.
*/
-#define MGET(m, how, type) \
-MBUFLOCK( \
- (m) = pool_cache_get(mb_cache, \
- (how) == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); \
- if (m) { \
- mbstat.m_mtypes[type]++; \
- _MOWNERINIT((m), (type)); \
- (m)->m_type = (type); \
- (m)->m_next = (struct mbuf *)NULL; \
- (m)->m_nextpkt = (struct mbuf *)NULL; \
- (m)->m_data = (m)->m_dat; \
- (m)->m_flags = 0; \
- } \
-)
-
-#define MGETHDR(m, how, type) \
-MBUFLOCK( \
- (m) = pool_cache_get(mb_cache, \
- (how) == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0); \
- if (m) { \
- mbstat.m_mtypes[type]++; \
- _MOWNERINIT((m), (type)); \
- (m)->m_type = (type); \
- (m)->m_next = (struct mbuf *)NULL; \
- (m)->m_nextpkt = (struct mbuf *)NULL; \
- (m)->m_data = (m)->m_pktdat; \
- (m)->m_flags = M_PKTHDR; \
- (m)->m_pkthdr.rcvif = NULL; \
- (m)->m_pkthdr.csum_flags = 0; \
- (m)->m_pkthdr.csum_data = 0; \
- SLIST_INIT(&(m)->m_pkthdr.tags); \
- } \
-)
+#define MGET(m, how, type) m = m_get((how), (type))
+#define MGETHDR(m, how, type) m = m_gethdr((how), (type))
#if defined(_KERNEL)
#define _M_
@@ -522,7 +473,7 @@ do { \
(n)->m_ext.ext_prevref = (o); \
(o)->m_ext.ext_nextref = (n); \
(n)->m_ext.ext_nextref->m_ext.ext_prevref = (n); \
- _MOWNERREF((n), (n)->m_flags); \
+ mowner_ref((n), (n)->m_flags); \
MCLREFDEBUGN((n), __FILE__, __LINE__); \
} while (/* CONSTCOND */ 0)
@@ -550,15 +501,12 @@ do { \
*/
#define _MCLGET(m, pool_cache, size, how) \
do { \
- MBUFLOCK( \
- (m)->m_ext.ext_buf = \
- pool_cache_get_paddr((pool_cache), \
- (how) == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : 0, \
- &(m)->m_ext.ext_paddr); \
- if ((m)->m_ext.ext_buf != NULL) \
- _MOWNERREF((m), M_EXT|M_CLUSTER); \
- ); \
+ (m)->m_ext.ext_buf = \
+ pool_cache_get_paddr((pool_cache), \
+ (how) == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : 0, \
+ &(m)->m_ext.ext_paddr); \
if ((m)->m_ext.ext_buf != NULL) { \
+ mowner_ref((m), M_EXT|M_CLUSTER); \
(m)->m_data = (m)->m_ext.ext_buf; \
(m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | \
M_EXT|M_CLUSTER|M_EXT_RW; \
@@ -588,7 +536,7 @@ do { \
(m)->m_ext.ext_arg = NULL; \
(m)->m_ext.ext_type = mbtypes[(m)->m_type]; \
MCLINITREFERENCE(m); \
- MOWNERREF((m), M_EXT); \
+ mowner_ref((m), M_EXT); \
} \
} while (/* CONSTCOND */ 0)
@@ -601,13 +549,13 @@ do { \
(m)->m_ext.ext_arg = (arg); \
(m)->m_ext.ext_type = (type); \
MCLINITREFERENCE(m); \
- MOWNERREF((m), M_EXT); \
+ mowner_ref((m), M_EXT); \
} while (/* CONSTCOND */ 0)
#define MEXTREMOVE(m) \
do { \
+ mowner_revoke((m), 0, (m)->m_flags); \
int _ms_ = splvm(); /* MBUFLOCK */ \
- _MOWNERREVOKE((m), 0, (m)->m_flags); \
m_ext_free(m, FALSE); \
splx(_ms_); \
(m)->m_flags &= ~M_EXTCOPYFLAGS; \
@@ -633,12 +581,12 @@ do { \
* Place the successor, if any, in n.
*/
#define MFREE(m, n) \
+ mowner_revoke((m), 1, (m)->m_flags); \
+ mbstat_type_add((m)->m_type, -1); \
MBUFLOCK( \
- mbstat.m_mtypes[(m)->m_type]--; \
if ((m)->m_flags & M_PKTHDR) \
m_tag_delete_chain((m), NULL); \
(n) = (m)->m_next; \
- _MOWNERREVOKE((m), 1, m->m_flags); \
if ((m)->m_flags & M_EXT) { \
m_ext_free(m, TRUE); \
} else { \
@@ -759,7 +707,8 @@ do { \
/* change mbuf to new type */
#define MCHTYPE(m, t) \
do { \
- MBUFLOCK(mbstat.m_mtypes[(m)->m_type]--; mbstat.m_mtypes[t]++;); \
+ mbstat_type_add((m)->m_type, -1); \
+ mbstat_type_add(t, 1); \
(m)->m_type = t; \
} while (/* CONSTCOND */ 0)
@@ -847,6 +796,10 @@ struct mbstat {
u_short m_mtypes[256]; /* type specific mbuf allocations */
};
+struct mbstat_cpu {
+ u_int m_mtypes[256]; /* type specific mbuf allocations */
+};
+
/*
* Mbuf sysctl variables.
*/
@@ -913,9 +866,6 @@ void m_adj(struct mbuf *, int);
int m_apply(struct mbuf *, int, int,
int (*)(void *, void *, unsigned int), void *);
void m_cat(struct mbuf *,struct mbuf *);
-#ifdef MBUFTRACE
-void m_claimm(struct mbuf *, struct mowner *);
-#endif
void m_clget(struct mbuf *, int);
int m_mballoc(int, int);
void m_copyback(struct mbuf *, int, int, const void *);
@@ -932,6 +882,9 @@ void m_move_pkthdr(struct mbuf *to, stru
static __inline u_int m_length(struct mbuf *) __unused;
static __inline void m_ext_free(struct mbuf *, bool) __unused;
+/* Statistics */
+void mbstat_type_add(int, int);
+
/* Packet tag routines */
struct m_tag *m_tag_get(int, int, int);
void m_tag_free(struct m_tag *);
Index: sys/systm.h
===================================================================
RCS file: /cvsroot/src/sys/sys/systm.h,v
retrieving revision 1.196.2.5
diff -u -p -r1.196.2.5 systm.h
--- sys/systm.h 23 Oct 2007 20:17:27 -0000 1.196.2.5
+++ sys/systm.h 12 Nov 2007 11:34:24 -0000
@@ -93,6 +93,7 @@ extern const char *rootspec; /* how root
extern int ncpu; /* number of CPUs configured */
extern int ncpuonline; /* number of CPUs online */
+extern bool mp_online; /* secondary processors are started */
extern const char hexdigits[]; /* "0123456789abcdef" in subr_prf.c */
extern const char HEXDIGITS[]; /* "0123456789ABCDEF" in subr_prf.c */
Index: kern/init_main.c
===================================================================
RCS file: /cvsroot/src/sys/kern/init_main.c,v
retrieving revision 1.299.2.28
diff -u -p -r1.299.2.28 init_main.c
--- kern/init_main.c 5 Nov 2007 17:08:31 -0000 1.299.2.28
+++ kern/init_main.c 12 Nov 2007 11:34:24 -0000
@@ -117,6 +117,7 @@ __KERNEL_RCSID(0, "$NetBSD: init_main.c,
#include <sys/exec.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
+#include <sys/sysctl.h>
#include <sys/reboot.h>
#include <sys/user.h>
#include <sys/sysctl.h>
@@ -294,6 +295,8 @@ main(void)
kmem_init();
+ percpu_init();
+
/* Initialize the extent manager. */
extent_init();
@@ -317,12 +320,6 @@ main(void)
/* Initialize the buffer cache */
bufinit();
- /*
- * Initialize mbuf's. Do this now because we might attempt to
- * allocate mbufs or mbuf clusters during autoconfiguration.
- */
- mbinit();
-
/* Initialize sockets. */
soinit();
@@ -360,6 +357,12 @@ main(void)
error = mi_cpu_attach(curcpu());
KASSERT(error == 0);
+ /*
+ * Initialize mbuf's. Do this now because we might attempt to
+ * allocate mbufs or mbuf clusters during autoconfiguration.
+ */
+ mbinit();
+
/* Initialize the sysctl subsystem. */
sysctl_init();
@@ -662,13 +665,14 @@ main(void)
/* Create the aiodone daemon kernel thread. */
if (workqueue_create(&uvm.aiodone_queue, "aiodoned",
- uvm_aiodone_worker, NULL, PRI_VM, IPL_NONE, WQ_MPSAFE))
+ uvm_aiodone_worker, NULL, PRI_VM, IPL_NONE, WQ_MPSAFE | WQ_PERCPU))
panic("fork aiodoned");
vmem_rehash_start();
-#if defined(MULTIPROCESSOR)
/* Boot the secondary processors. */
+ mp_online = true;
+#if defined(MULTIPROCESSOR)
cpu_boot_secondary_processors();
#endif
Index: kern/kern_cpu.c
===================================================================
RCS file: /cvsroot/src/sys/kern/kern_cpu.c,v
retrieving revision 1.2.2.9
diff -u -p -r1.2.2.9 kern_cpu.c
--- kern/kern_cpu.c 1 Nov 2007 21:58:16 -0000 1.2.2.9
+++ kern/kern_cpu.c 12 Nov 2007 11:34:24 -0000
@@ -98,6 +98,7 @@ const struct cdevsw cpuctl_cdevsw = {
kmutex_t cpu_lock;
int ncpu;
int ncpuonline;
+bool mp_online;
int
mi_cpu_attach(struct cpu_info *ci)
@@ -122,6 +123,7 @@ mi_cpu_attach(struct cpu_info *ci)
else
ci->ci_data.cpu_onproc = ci->ci_data.cpu_idlelwp;
+ percpu_init_cpu(ci);
softint_init(ci);
xc_init_cpu(ci);
pool_cache_cpu_init(ci);
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvsroot/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.120.2.3
diff -u -p -r1.120.2.3 uipc_mbuf.c
--- kern/uipc_mbuf.c 1 Nov 2007 21:05:21 -0000 1.120.2.3
+++ kern/uipc_mbuf.c 12 Nov 2007 11:34:24 -0000
@@ -84,6 +84,7 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,
#include <sys/syslog.h>
#include <sys/domain.h>
#include <sys/protosw.h>
+#include <sys/percpu.h>
#include <sys/pool.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
@@ -126,6 +127,8 @@ static const char mclpool_warnmsg[] =
MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
+static percpu_t *mbstat_percpu;
+
#ifdef MBUFTRACE
struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners);
struct mowner unknown_mowners[] = {
@@ -171,6 +174,8 @@ mbinit(void)
*/
pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60);
+ mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu));
+
/*
* Set a low water mark for both mbufs and clusters. This should
* help ensure that they can be allocated in a memory starvation
@@ -250,6 +255,31 @@ sysctl_kern_mbuf(SYSCTLFN_ARGS)
}
#ifdef MBUFTRACE
+static void
+mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user)
+{
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+
+ memset(mo_user, 0, sizeof(*mo_user));
+ KASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name));
+ KASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr));
+ memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name));
+ memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr));
+
+ percpu_traverse_enter();
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ struct mowner_counter *mc;
+ int i;
+
+ mc = percpu_getptr_remote(mo->mo_counters, ci);
+ for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) {
+ mo_user->mo_counter[i] += mc->mc_counter[i];
+ }
+ }
+ percpu_traverse_exit();
+}
+
static int
sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS)
{
@@ -263,16 +293,21 @@ sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS)
return (EPERM);
LIST_FOREACH(mo, &mowners, mo_link) {
+ struct mowner_user mo_user;
+
+ mowner_convert_to_user(mo, &mo_user);
+
if (oldp != NULL) {
- if (*oldlenp - len < sizeof(*mo)) {
+ if (*oldlenp - len < sizeof(mo_user)) {
error = ENOMEM;
break;
}
- error = copyout(mo, (char *)oldp + len, sizeof(*mo));
+ error = copyout(&mo_user, (char *)oldp + len,
+ sizeof(mo_user));
if (error)
break;
}
- len += sizeof(*mo);
+ len += sizeof(mo_user);
}
if (error == 0)
@@ -282,6 +317,40 @@ sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS)
}
#endif /* MBUFTRACE */
+static void
+mbstat_convert_to_user(struct mbstat *mbs)
+{
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci;
+
+ memset(mbs, 0, sizeof(*mbs));
+ mbs->m_drain = mbstat.m_drain;
+ percpu_traverse_enter();
+ for (CPU_INFO_FOREACH(cii, ci)) {
+ struct mbstat_cpu *mbsc;
+ int i;
+
+ mbsc = percpu_getptr_remote(mbstat_percpu, ci);
+ for (i = 0; i < __arraycount(mbs->m_mtypes); i++) {
+ mbs->m_mtypes[i] += mbsc->m_mtypes[i];
+ }
+ }
+ percpu_traverse_exit();
+}
+
+static int
+sysctl_kern_mbuf_stats(SYSCTLFN_ARGS)
+{
+ struct sysctlnode node;
+ struct mbstat mbs;
+
+ mbstat_convert_to_user(&mbs);
+ node = *rnode;
+ node.sysctl_data = &mbs;
+ node.sysctl_size = sizeof(mbs);
+ return sysctl_lookup(SYSCTLFN_CALL(&node));
+}
+
SYSCTL_SETUP(sysctl_kern_mbuf_setup, "sysctl kern.mbuf subtree setup")
{
@@ -331,7 +400,7 @@ SYSCTL_SETUP(sysctl_kern_mbuf_setup, "sy
CTLFLAG_PERMANENT,
CTLTYPE_STRUCT, "stats",
SYSCTL_DESCR("mbuf allocation statistics"),
- NULL, 0, &mbstat, sizeof(mbstat),
+ sysctl_kern_mbuf_stats, 0, NULL, 0,
CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL);
#ifdef MBUFTRACE
sysctl_createv(clog, 0, NULL, NULL,
@@ -407,8 +476,20 @@ m_get(int nowait, int type)
{
struct mbuf *m;
- MGET(m, nowait, type);
- return (m);
+ m = pool_cache_get(mb_cache,
+ nowait == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : 0);
+ if (m == NULL)
+ return NULL;
+
+ mbstat_type_add(type, 1);
+ mowner_init(m, type);
+ m->m_type = type;
+ m->m_next = NULL;
+ m->m_nextpkt = NULL;
+ m->m_data = m->m_dat;
+ m->m_flags = 0;
+
+ return m;
}
struct mbuf *
@@ -416,8 +497,18 @@ m_gethdr(int nowait, int type)
{
struct mbuf *m;
- MGETHDR(m, nowait, type);
- return (m);
+ m = m_get(nowait, type);
+ if (m == NULL)
+ return NULL;
+
+ m->m_data = m->m_pktdat;
+ m->m_flags = M_PKTHDR;
+ m->m_pkthdr.rcvif = NULL;
+ m->m_pkthdr.csum_flags = 0;
+ m->m_pkthdr.csum_data = 0;
+ SLIST_INIT(&m->m_pkthdr.tags);
+
+ return m;
}
struct mbuf *
@@ -1484,3 +1575,127 @@ nextchain:
}
}
#endif /* defined(DDB) */
+
+void
+mbstat_type_add(int type, int diff)
+{
+ struct mbstat_cpu *mb;
+ int s;
+
+ s = splvm();
+ mb = percpu_getptr(mbstat_percpu);
+ mb->m_mtypes[type] += diff;
+ splx(s);
+}
+
+#if defined(MBUFTRACE)
+void
+mowner_attach(struct mowner *mo)
+{
+
+ KASSERT(mo->mo_counters == NULL);
+ mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter));
+
+ /* XXX lock */
+ LIST_INSERT_HEAD(&mowners, mo, mo_link);
+}
+
+void
+mowner_detach(struct mowner *mo)
+{
+
+ KASSERT(mo->mo_counters != NULL);
+
+ /* XXX lock */
+ LIST_REMOVE(mo, mo_link);
+
+ percpu_free(mo->mo_counters, sizeof(struct mowner_counter));
+ mo->mo_counters = NULL;
+}
+
+static struct mowner_counter *
+mowner_counter(struct mowner *mo)
+{
+
+ return percpu_getptr(mo->mo_counters);
+}
+
+void
+mowner_init(struct mbuf *m, int type)
+{
+ struct mowner_counter *mc;
+ struct mowner *mo;
+ int s;
+
+ m->m_owner = mo = &unknown_mowners[type];
+ s = splvm();
+ mc = mowner_counter(mo);
+ mc->mc_counter[MOWNER_COUNTER_CLAIMS]++;
+ splx(s);
+}
+
+void
+mowner_ref(struct mbuf *m, int flags)
+{
+ struct mowner *mo = m->m_owner;
+ struct mowner_counter *mc;
+ int s;
+
+ s = splvm();
+ mc = mowner_counter(mo);
+ if ((flags & M_EXT) != 0)
+ mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++;
+ if ((flags & M_CLUSTER) != 0)
+ mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++;
+ splx(s);
+}
+
+void
+mowner_revoke(struct mbuf *m, bool all, int flags)
+{
+ struct mowner *mo = m->m_owner;
+ struct mowner_counter *mc;
+ int s;
+
+ s = splvm();
+ mc = mowner_counter(mo);
+ if ((flags & M_EXT) != 0)
+ mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++;
+ if ((flags & M_CLUSTER) != 0)
+ mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++;
+ if (all)
+ mc->mc_counter[MOWNER_COUNTER_RELEASES]++;
+ splx(s);
+ if (all)
+ m->m_owner = &revoked_mowner;
+}
+
+static void
+mowner_claim(struct mbuf *m, struct mowner *mo)
+{
+ struct mowner_counter *mc;
+ int flags = m->m_flags;
+ int s;
+
+ s = splvm();
+ mc = mowner_counter(mo);
+ mc->mc_counter[MOWNER_COUNTER_CLAIMS]++;
+ if ((flags & M_EXT) != 0)
+ mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++;
+ if ((flags & M_CLUSTER) != 0)
+ mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++;
+ splx(s);
+ m->m_owner = mo;
+}
+
+void
+m_claim(struct mbuf *m, struct mowner *mo)
+{
+
+ if (m->m_owner == mo || mo == NULL)
+ return;
+
+ mowner_revoke(m, true, m->m_flags);
+ mowner_claim(m, mo);
+}
+#endif /* defined(MBUFTRACE) */
Index: conf/files
===================================================================
RCS file: /cvsroot/src/sys/conf/files,v
retrieving revision 1.834.2.11
diff -u -p -r1.834.2.11 files
--- conf/files 23 Oct 2007 20:17:04 -0000 1.834.2.11
+++ conf/files 12 Nov 2007 11:34:24 -0000
@@ -1378,6 +1378,7 @@ file kern/subr_hash.c
file kern/subr_kmem.c
file kern/subr_lockdebug.c
file kern/subr_log.c
+file kern/subr_percpu.c
file kern/subr_pool.c
file kern/subr_prf.c
file kern/subr_prf2.c
--NextPart-20071112203611-0619600--