From be8e59f5a64ef775c9694aee0d6a87d92336d303 Mon Sep 17 00:00:00 2001 From: Ben Maurer Date: Sat, 5 Apr 2014 15:59:08 -0700 Subject: [PATCH] Don't dereference chunk->arena in free() hot path When you call free() we load chunk->arena even though that data isn't used on the tcache hot path. In profiling some FB applications, I found that ~30% of the dTLB misses in the free() function come from this line. With 4 MB chunks, the arena_chunk_t->map is ~ 32 KB (1024 pages in the chunk, 4 8 byte pointers in arena_chunk_map_t). This means there's only a 1/8 chance of the page containing chunk->arena also comtaining the map bits. --- include/jemalloc/internal/arena.h | 11 ++++------- include/jemalloc/internal/jemalloc_internal.h.in | 2 +- src/jemalloc.c | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 9d000c03..b899888d 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -495,8 +495,7 @@ prof_ctx_t *arena_prof_ctx_get(const void *ptr); void arena_prof_ctx_set(const void *ptr, size_t usize, prof_ctx_t *ctx); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); size_t arena_salloc(const void *ptr, bool demote); -void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, - bool try_tcache); +void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) @@ -1022,13 +1021,11 @@ arena_salloc(const void *ptr, bool demote) } JEMALLOC_ALWAYS_INLINE void -arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) +arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache) { size_t pageind, mapbits; tcache_t *tcache; - assert(arena != NULL); - assert(chunk->arena == arena); assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); @@ -1043,7 +1040,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) binind = arena_ptr_small_binind_get(ptr, mapbits); tcache_dalloc_small(tcache, ptr, binind); } else - arena_dalloc_small(arena, chunk, ptr, pageind); + arena_dalloc_small(chunk->arena, chunk, ptr, pageind); } else { size_t size = arena_mapbits_large_size_get(chunk, pageind); @@ -1053,7 +1050,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache) tcache_get(false)) != NULL) { tcache_dalloc_large(tcache, ptr, size); } else - arena_dalloc_large(arena, chunk, ptr); + arena_dalloc_large(chunk->arena, chunk, ptr); } } # endif /* JEMALLOC_ARENA_INLINE_B */ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 574bbb14..9c79ae00 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -895,7 +895,7 @@ idalloct(void *ptr, bool try_tcache) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr, try_tcache); + arena_dalloc(chunk, ptr, try_tcache); else huge_dalloc(ptr, true); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 204778bc..558dbb20 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -2103,7 +2103,7 @@ a0free(void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr, false); + arena_dalloc(chunk, ptr, false); else huge_dalloc(ptr, true); }