NetBSD-Bugs archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

Re: lib/50791: NetBSD's malloc has performance issues



On Thu, Feb 18, 2016 at 04:15:00PM +0000, Andreas Gustafsson wrote:
>  I wonder how much faster release builds would be with a newer
>  jemalloc in libc...

I somehow doubt that it makes a big difference. The provided trace is
somewhat atypical as roughly half of the allocations is between 64KB and
256KB. For jemalloc, that's a large allocation, so chunks out of the
larger 1MB blocks are allocated for it. What happens is that we are
using a single arena in either the single threaded or always running on
the same CPU case. The search of for the next free run of enough pages
is done in chunk address order, so the most of the allocations
process have to process most of the chunks. Keeping the chunk tree
ordered by the estimated number size of the largest run avoids this
worst case at the cost of shuffling things around a bit more often. A
proof of concept is attached. I haven't done any exhaustive testing for
this...

Joerg
Index: jemalloc.c
===================================================================
RCS file: /home/joerg/repo/netbsd/src/lib/libc/stdlib/jemalloc.c,v
retrieving revision 1.39
diff -u -p -r1.39 jemalloc.c
--- jemalloc.c	24 Jan 2016 21:56:43 -0000	1.39
+++ jemalloc.c	19 Feb 2016 03:11:25 -0000
@@ -1661,6 +1661,11 @@ arena_chunk_comp(arena_chunk_t *a, arena
 	assert(a != NULL);
 	assert(b != NULL);
 
+	if (a->max_frun_npages < a->max_frun_npages)
+		return -1;
+	if (a->max_frun_npages > a->max_frun_npages)
+		return 1;
+
 	if ((uintptr_t)a < (uintptr_t)b)
 		return (-1);
 	else if (a == b)
@@ -1912,9 +1917,6 @@ arena_chunk_alloc(arena_t *arena)
 
 		chunk->arena = arena;
 
-		/* LINTED */
-		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
-
 		/*
 		 * Claim that no pages are in use, since the header is merely
 		 * overhead.
@@ -1925,6 +1927,10 @@ arena_chunk_alloc(arena_t *arena)
 		    arena_chunk_header_npages;
 		chunk->min_frun_ind = arena_chunk_header_npages;
 
+		/* Insert now that max_frun_npages is up-to-date. */
+		/* LINTED */
+		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
+
 		/*
 		 * Initialize enough of the map to support one maximal free run.
 		 */
@@ -1970,7 +1976,7 @@ arena_chunk_dealloc(arena_t *arena, aren
 static arena_run_t *
 arena_run_alloc(arena_t *arena, size_t size)
 {
-	arena_chunk_t *chunk;
+	arena_chunk_t *chunk, *chunk_tmp;
 	arena_run_t *run;
 	unsigned need_npages, limit_pages, compl_need_npages;
 
@@ -1986,14 +1992,30 @@ arena_run_alloc(arena_t *arena, size_t s
 	limit_pages = chunk_npages - arena_chunk_header_npages;
 	compl_need_npages = limit_pages - need_npages;
 	/* LINTED */
-	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
+	for (;;) {
+		chunk_tmp = RB_ROOT(&arena->chunks);
+		chunk = NULL;
+		while (chunk_tmp) {
+			if (chunk_tmp->max_frun_npages == need_npages) {
+				chunk = chunk_tmp;
+				break;
+			}
+			if (chunk_tmp->max_frun_npages < need_npages) {
+				chunk_tmp = RB_RIGHT(chunk_tmp, link);
+				continue;
+			}
+			chunk = chunk_tmp;
+			chunk_tmp = RB_LEFT(chunk, link);
+		}
+		if (chunk == NULL)
+			break;
 		/*
 		 * Avoid searching this chunk if there are not enough
 		 * contiguous free pages for there to possibly be a large
 		 * enough free run.
 		 */
-		if (chunk->pages_used <= compl_need_npages &&
-		    need_npages <= chunk->max_frun_npages) {
+		assert(need_npages <= chunk->max_frun_npages);
+		{
 			arena_chunk_map_t *mapelm;
 			unsigned i;
 			unsigned max_frun_npages = 0;
@@ -2031,7 +2053,9 @@ arena_run_alloc(arena_t *arena, size_t s
 			 * chunk->min_frun_ind was already reset above (if
 			 * necessary).
 			 */
+			RB_REMOVE(arena_chunk_tree_s, &arena->chunks, chunk);
 			chunk->max_frun_npages = max_frun_npages;
+			RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
 		}
 	}
 
@@ -2114,8 +2138,11 @@ arena_run_dalloc(arena_t *arena, arena_r
 		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
 	}
 
-	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
+	if (chunk->map[run_ind].npages > chunk->max_frun_npages) {
+		RB_REMOVE(arena_chunk_tree_s, &arena->chunks, chunk);
 		chunk->max_frun_npages = chunk->map[run_ind].npages;
+		RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
+	}
 	if (run_ind < chunk->min_frun_ind)
 		chunk->min_frun_ind = run_ind;
 


Home | Main Index | Thread Index | Old Index