diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 0c956040..f7fbe305 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -2207,6 +2207,20 @@ struct extent_hooks_s { considered. + + + stats.metadata_thp + (size_t) + r- + [] + + Number of transparent huge pages (THP) used for + metadata. See stats.metadata and + opt.metadata_thp) for + details. + + stats.resident @@ -2523,6 +2537,18 @@ struct extent_hooks_s { profiles. + + + stats.arenas.<i>.metadata_thp + (size_t) + r- + [] + + Number of transparent huge pages (THP) used for + metadata. See opt.metadata_thp + for details. + + stats.arenas.<i>.resident diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h index 6cd11877..7b705c9b 100644 --- a/include/jemalloc/internal/base_externs.h +++ b/include/jemalloc/internal/base_externs.h @@ -13,7 +13,7 @@ extent_hooks_t *base_extent_hooks_set(base_t *base, void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment); extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base); void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, - size_t *resident, size_t *mapped); + size_t *resident, size_t *mapped, size_t *n_thp); void base_prefork(tsdn_t *tsdn, base_t *base); void base_postfork_parent(tsdn_t *tsdn, base_t *base); void base_postfork_child(tsdn_t *tsdn, base_t *base); diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h index 18e227bd..b5421693 100644 --- a/include/jemalloc/internal/base_structs.h +++ b/include/jemalloc/internal/base_structs.h @@ -50,6 +50,8 @@ struct base_s { size_t allocated; size_t resident; size_t mapped; + /* Number of THP regions touched. */ + size_t n_thp; }; #endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index a91c4cf5..a36feaff 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -48,6 +48,7 @@ typedef struct ctl_stats_s { size_t allocated; size_t active; size_t metadata; + size_t metadata_thp; size_t resident; size_t mapped; size_t retained; diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h index ab872e59..f19df374 100644 --- a/include/jemalloc/internal/stats.h +++ b/include/jemalloc/internal/stats.h @@ -142,6 +142,7 @@ typedef struct arena_stats_s { atomic_zu_t base; /* Derived. */ atomic_zu_t internal; atomic_zu_t resident; /* Derived. */ + atomic_zu_t metadata_thp; atomic_zu_t allocated_large; /* Derived. */ arena_stats_u64_t nmalloc_large; /* Derived. */ diff --git a/src/arena.c b/src/arena.c index 19aafaf0..18ed5aac 100644 --- a/src/arena.c +++ b/src/arena.c @@ -234,9 +234,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms, muzzy_decay_ms, nactive, ndirty, nmuzzy); - size_t base_allocated, base_resident, base_mapped; + size_t base_allocated, base_resident, base_mapped, metadata_thp; base_stats_get(tsdn, arena->base, &base_allocated, &base_resident, - &base_mapped); + &base_mapped, &metadata_thp); arena_stats_lock(tsdn, &arena->stats); @@ -267,6 +267,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_accum_zu(&astats->base, base_allocated); arena_stats_accum_zu(&astats->internal, arena_internal_get(arena)); + arena_stats_accum_zu(&astats->metadata_thp, metadata_thp); arena_stats_accum_zu(&astats->resident, base_resident + (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) + extents_npages_get(&arena->extents_dirty) + diff --git a/src/base.c b/src/base.c index 9cb02b63..609a445b 100644 --- a/src/base.c +++ b/src/base.c @@ -22,6 +22,11 @@ const char *metadata_thp_mode_names[] = { /******************************************************************************/ +static inline bool +metadata_thp_madvise(void) { + return (metadata_thp_enabled() && thp_state_madvise); +} + static void * base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) { void *addr; @@ -101,7 +106,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr, post_reentrancy(tsd); } label_done: - if (metadata_thp_enabled() && thp_state_madvise) { + if (metadata_thp_madvise()) { /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */ assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && (size & HUGEPAGE_MASK) == 0); @@ -120,6 +125,13 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr, extent_binit(extent, addr, size, sn); } +static bool +base_is_single_block(base_t *base) { + assert(base->blocks != NULL && + (base->blocks->size & HUGEPAGE_MASK) == 0); + return (base->blocks->next == NULL); +} + static void * base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size, size_t alignment) { @@ -155,12 +167,20 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent, base->allocated += size; /* * Add one PAGE to base_resident for every page boundary that is - * crossed by the new allocation. + * crossed by the new allocation. Adjust n_thp similarly when + * metadata_thp is enabled. */ base->resident += PAGE_CEILING((uintptr_t)addr + size) - PAGE_CEILING((uintptr_t)addr - gap_size); assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + if (metadata_thp_madvise() && (!base_is_single_block(base) || + opt_metadata_thp == metadata_thp_always)) { + base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size) + - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >> + LG_HUGEPAGE; + assert(base->mapped >= base->n_thp << LG_HUGEPAGE); + } } } @@ -209,7 +229,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks, return NULL; } - if (metadata_thp_enabled() && thp_state_madvise) { + if (metadata_thp_madvise()) { void *addr = (void *)block; assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && (block_size & HUGEPAGE_MASK) == 0); @@ -218,12 +238,15 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks, /* Use hugepage for the new block. */ pages_huge(addr, block_size); } - if (base != NULL && opt_metadata_thp == metadata_thp_auto) { + if (base != NULL && base_is_single_block(base) && + opt_metadata_thp == metadata_thp_auto) { /* Make the first block THP lazily. */ base_block_t *first_block = base->blocks; - if (first_block->next == NULL) { - assert((first_block->size & HUGEPAGE_MASK) == 0); - pages_huge(first_block, first_block->size); + assert((first_block->size & HUGEPAGE_MASK) == 0); + pages_huge(first_block, first_block->size); + if (config_stats) { + assert(base->n_thp == 0); + base->n_thp += first_block->size >> LG_HUGEPAGE; } } } @@ -264,8 +287,15 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { base->allocated += sizeof(base_block_t); base->resident += PAGE_CEILING(sizeof(base_block_t)); base->mapped += block->size; + if (metadata_thp_madvise()) { + assert(!base_is_single_block(base)); + assert(base->n_thp > 0); + base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >> + LG_HUGEPAGE; + } assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + assert(base->n_thp << LG_HUGEPAGE <= base->mapped); } return &block->extent; } @@ -307,8 +337,12 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { base->allocated = sizeof(base_block_t); base->resident = PAGE_CEILING(sizeof(base_block_t)); base->mapped = block->size; + base->n_thp = (opt_metadata_thp == metadata_thp_always) && + metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t)) + >> LG_HUGEPAGE : 0; assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + assert(base->n_thp << LG_HUGEPAGE <= base->mapped); } base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base, base_size); @@ -403,7 +437,7 @@ base_alloc_extent(tsdn_t *tsdn, base_t *base) { void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, - size_t *mapped) { + size_t *mapped, size_t *n_thp) { cassert(config_stats); malloc_mutex_lock(tsdn, &base->mtx); @@ -412,6 +446,7 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, *allocated = base->allocated; *resident = base->resident; *mapped = base->mapped; + *n_thp = base->n_thp; malloc_mutex_unlock(tsdn, &base->mtx); } diff --git a/src/ctl.c b/src/ctl.c index ace10b02..a2f3837a 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -183,6 +183,7 @@ CTL_PROTO(stats_arenas_i_muzzy_nmadvise) CTL_PROTO(stats_arenas_i_muzzy_purged) CTL_PROTO(stats_arenas_i_base) CTL_PROTO(stats_arenas_i_internal) +CTL_PROTO(stats_arenas_i_metadata_thp) CTL_PROTO(stats_arenas_i_tcache_bytes) CTL_PROTO(stats_arenas_i_resident) INDEX_PROTO(stats_arenas_i) @@ -192,6 +193,7 @@ CTL_PROTO(stats_background_thread_num_threads) CTL_PROTO(stats_background_thread_num_runs) CTL_PROTO(stats_background_thread_run_interval) CTL_PROTO(stats_metadata) +CTL_PROTO(stats_metadata_thp) CTL_PROTO(stats_resident) CTL_PROTO(stats_mapped) CTL_PROTO(stats_retained) @@ -476,6 +478,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("muzzy_purged"), CTL(stats_arenas_i_muzzy_purged)}, {NAME("base"), CTL(stats_arenas_i_base)}, {NAME("internal"), CTL(stats_arenas_i_internal)}, + {NAME("metadata_thp"), CTL(stats_arenas_i_metadata_thp)}, {NAME("tcache_bytes"), CTL(stats_arenas_i_tcache_bytes)}, {NAME("resident"), CTL(stats_arenas_i_resident)}, {NAME("small"), CHILD(named, stats_arenas_i_small)}, @@ -514,6 +517,7 @@ static const ctl_named_node_t stats_node[] = { {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("metadata"), CTL(stats_metadata)}, + {NAME("metadata_thp"), CTL(stats_metadata_thp)}, {NAME("resident"), CTL(stats_resident)}, {NAME("mapped"), CTL(stats_mapped)}, {NAME("retained"), CTL(stats_retained)}, @@ -775,6 +779,8 @@ MUTEX_PROF_ARENA_MUTEXES &astats->astats.internal); accum_atomic_zu(&sdstats->astats.resident, &astats->astats.resident); + accum_atomic_zu(&sdstats->astats.metadata_thp, + &astats->astats.metadata_thp); } else { assert(atomic_load_zu( &astats->astats.internal, ATOMIC_RELAXED) == 0); @@ -940,6 +946,8 @@ ctl_refresh(tsdn_t *tsdn) { &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) + atomic_load_zu(&ctl_sarena->astats->astats.internal, ATOMIC_RELAXED); + ctl_stats->metadata_thp = atomic_load_zu( + &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED); ctl_stats->resident = atomic_load_zu( &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED); ctl_stats->mapped = atomic_load_zu( @@ -2464,6 +2472,7 @@ CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t) CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t) CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t) CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t) +CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t) CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t) CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t) @@ -2519,6 +2528,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_base, CTL_RO_CGEN(config_stats, stats_arenas_i_internal, atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED), size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp, + atomic_load_zu(&arenas_i(mib[2])->astats->astats.metadata_thp, + ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes, atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes, ATOMIC_RELAXED), size_t) diff --git a/src/stats.c b/src/stats.c index e1a3f8cf..cbeb923d 100644 --- a/src/stats.c +++ b/src/stats.c @@ -401,7 +401,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *dss; ssize_t dirty_decay_ms, muzzy_decay_ms; size_t page, pactive, pdirty, pmuzzy, mapped, retained; - size_t base, internal, resident; + size_t base, internal, resident, metadata_thp; uint64_t dirty_npurge, dirty_nmadvise, dirty_purged; uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged; size_t small_allocated; @@ -613,6 +613,15 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, "internal: %12zu\n", internal); } + CTL_M2_GET("stats.arenas.0.metadata_thp", i, &metadata_thp, size_t); + if (json) { + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\t\"metadata_thp\": %zu,\n", metadata_thp); + } else { + malloc_cprintf(write_cb, cbopaque, + "metadata_thp: %12zu\n", metadata_thp); + } + CTL_M2_GET("stats.arenas.0.tcache_bytes", i, &tcache_bytes, size_t); if (json) { malloc_cprintf(write_cb, cbopaque, @@ -1007,13 +1016,15 @@ static void stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, bool json, bool merged, bool destroyed, bool unmerged, bool bins, bool large, bool mutex) { - size_t allocated, active, metadata, resident, mapped, retained; + size_t allocated, active, metadata, metadata_thp, resident, mapped, + retained; size_t num_background_threads; uint64_t background_thread_num_runs, background_thread_run_interval; CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); CTL_GET("stats.metadata", &metadata, size_t); + CTL_GET("stats.metadata_thp", &metadata_thp, size_t); CTL_GET("stats.resident", &resident, size_t); CTL_GET("stats.mapped", &mapped, size_t); CTL_GET("stats.retained", &retained, size_t); @@ -1046,6 +1057,8 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, "\t\t\t\"active\": %zu,\n", active); malloc_cprintf(write_cb, cbopaque, "\t\t\t\"metadata\": %zu,\n", metadata); + malloc_cprintf(write_cb, cbopaque, + "\t\t\t\"metadata_thp\": %zu,\n", metadata_thp); malloc_cprintf(write_cb, cbopaque, "\t\t\t\"resident\": %zu,\n", resident); malloc_cprintf(write_cb, cbopaque, @@ -1082,9 +1095,10 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, "\t\t}%s\n", (merged || unmerged || destroyed) ? "," : ""); } else { malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, metadata: %zu," + "Allocated: %zu, active: %zu, metadata: %zu (n_thp %zu)," " resident: %zu, mapped: %zu, retained: %zu\n", - allocated, active, metadata, resident, mapped, retained); + allocated, active, metadata, metadata_thp, resident, mapped, + retained); if (have_background_thread && num_background_threads > 0) { malloc_cprintf(write_cb, cbopaque, diff --git a/test/unit/base.c b/test/unit/base.c index 7fa24ac0..6b792cf2 100644 --- a/test/unit/base.c +++ b/test/unit/base.c @@ -28,22 +28,28 @@ static extent_hooks_t hooks_not_null = { TEST_BEGIN(test_base_hooks_default) { base_t *base; - size_t allocated0, allocated1, resident, mapped; + size_t allocated0, allocated1, resident, mapped, n_thp; tsdn_t *tsdn = tsd_tsdn(tsd_fetch()); base = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default); if (config_stats) { - base_stats_get(tsdn, base, &allocated0, &resident, &mapped); + base_stats_get(tsdn, base, &allocated0, &resident, &mapped, + &n_thp); assert_zu_ge(allocated0, sizeof(base_t), "Base header should count as allocated"); + if (opt_metadata_thp == metadata_thp_always) { + assert_zu_gt(n_thp, 0, + "Base should have 1 THP at least."); + } } assert_ptr_not_null(base_alloc(tsdn, base, 42, 1), "Unexpected base_alloc() failure"); if (config_stats) { - base_stats_get(tsdn, base, &allocated1, &resident, &mapped); + base_stats_get(tsdn, base, &allocated1, &resident, &mapped, + &n_thp); assert_zu_ge(allocated1 - allocated0, 42, "At least 42 bytes were allocated by base_alloc()"); } @@ -55,7 +61,7 @@ TEST_END TEST_BEGIN(test_base_hooks_null) { extent_hooks_t hooks_orig; base_t *base; - size_t allocated0, allocated1, resident, mapped; + size_t allocated0, allocated1, resident, mapped, n_thp; extent_hooks_prep(); try_dalloc = false; @@ -71,16 +77,22 @@ TEST_BEGIN(test_base_hooks_null) { assert_ptr_not_null(base, "Unexpected base_new() failure"); if (config_stats) { - base_stats_get(tsdn, base, &allocated0, &resident, &mapped); + base_stats_get(tsdn, base, &allocated0, &resident, &mapped, + &n_thp); assert_zu_ge(allocated0, sizeof(base_t), "Base header should count as allocated"); + if (opt_metadata_thp == metadata_thp_always) { + assert_zu_gt(n_thp, 0, + "Base should have 1 THP at least."); + } } assert_ptr_not_null(base_alloc(tsdn, base, 42, 1), "Unexpected base_alloc() failure"); if (config_stats) { - base_stats_get(tsdn, base, &allocated1, &resident, &mapped); + base_stats_get(tsdn, base, &allocated1, &resident, &mapped, + &n_thp); assert_zu_ge(allocated1 - allocated0, 42, "At least 42 bytes were allocated by base_alloc()"); }