Change opt.metadata_thp to [disabled,auto,always].
To avoid the high RSS caused by THP + low usage arena (i.e. THP becomes a significant percentage), added a new "auto" option which will only start using THP after a base allocator used up the first THP region. Starting from the second hugepage (in a single arena), "auto" behaves the same as "always", i.e. madvise hugepage right away.
This commit is contained in:
parent
ea91dfa58e
commit
47b20bb654
@ -919,13 +919,15 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
|
||||
<varlistentry id="opt.metadata_thp">
|
||||
<term>
|
||||
<mallctl>opt.metadata_thp</mallctl>
|
||||
(<type>bool</type>)
|
||||
(<type>const char *</type>)
|
||||
<literal>r-</literal>
|
||||
</term>
|
||||
<listitem><para>If true, allow jemalloc to use transparent huge page
|
||||
(THP) for internal metadata (see <link
|
||||
linkend="stats.metadata">stats.metadata</link> for details). This
|
||||
option is disabled by default.</para></listitem>
|
||||
<listitem><para>Controls whether to allow jemalloc to use transparent
|
||||
huge page (THP) for internal metadata (see <link
|
||||
linkend="stats.metadata">stats.metadata</link>). <quote>always</quote>
|
||||
allows such usage. <quote>auto</quote> uses no THP initially, but may
|
||||
begin to do so when metadata usage reaches certain level. The default
|
||||
is <quote>disabled</quote>.</para></listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry id="opt.retain">
|
||||
|
@ -1,7 +1,8 @@
|
||||
#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
|
||||
#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
|
||||
|
||||
extern bool opt_metadata_thp;
|
||||
extern metadata_thp_mode_t opt_metadata_thp;
|
||||
extern const char *metadata_thp_mode_names[];
|
||||
|
||||
base_t *b0get(void);
|
||||
base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
|
||||
|
@ -6,4 +6,8 @@ base_ind_get(const base_t *base) {
|
||||
return base->ind;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
metadata_thp_enabled(void) {
|
||||
return (opt_metadata_thp != metadata_thp_disabled);
|
||||
}
|
||||
#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
|
||||
|
@ -4,6 +4,21 @@
|
||||
typedef struct base_block_s base_block_t;
|
||||
typedef struct base_s base_t;
|
||||
|
||||
#define METADATA_THP_DEFAULT false
|
||||
#define METADATA_THP_DEFAULT metadata_thp_disabled
|
||||
|
||||
typedef enum {
|
||||
metadata_thp_disabled = 0,
|
||||
/*
|
||||
* Lazily enable hugepage for metadata. To avoid high RSS caused by THP
|
||||
* + low usage arena (i.e. THP becomes a significant percentage), the
|
||||
* "auto" option only starts using THP after a base allocator used up
|
||||
* the first THP region. Starting from the second hugepage (in a single
|
||||
* arena), "auto" behaves the same as "always", i.e. madvise hugepage
|
||||
* right away.
|
||||
*/
|
||||
metadata_thp_auto = 1,
|
||||
metadata_thp_always = 2,
|
||||
metadata_thp_mode_limit = 3
|
||||
} metadata_thp_mode_t;
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
|
||||
|
46
src/base.c
46
src/base.c
@ -12,7 +12,13 @@
|
||||
|
||||
static base_t *b0;
|
||||
|
||||
bool opt_metadata_thp = METADATA_THP_DEFAULT;
|
||||
metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
|
||||
|
||||
const char *metadata_thp_mode_names[] = {
|
||||
"disabled",
|
||||
"auto",
|
||||
"always"
|
||||
};
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
@ -24,7 +30,7 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
|
||||
|
||||
/* We use hugepage sizes regardless of opt_metadata_thp. */
|
||||
assert(size == HUGEPAGE_CEILING(size));
|
||||
size_t alignment = opt_metadata_thp ? HUGEPAGE : PAGE;
|
||||
size_t alignment = metadata_thp_enabled() ? HUGEPAGE : PAGE;
|
||||
if (extent_hooks == &extent_hooks_default) {
|
||||
addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
|
||||
} else {
|
||||
@ -36,12 +42,6 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
|
||||
post_reentrancy(tsd);
|
||||
}
|
||||
|
||||
if (addr != NULL && opt_metadata_thp && thp_state_madvise) {
|
||||
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
|
||||
(size & HUGEPAGE_MASK) == 0);
|
||||
pages_huge(addr, size);
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
@ -101,7 +101,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
|
||||
post_reentrancy(tsd);
|
||||
}
|
||||
label_done:
|
||||
if (opt_metadata_thp && thp_state_madvise) {
|
||||
if (metadata_thp_enabled() && thp_state_madvise) {
|
||||
/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
|
||||
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
|
||||
(size & HUGEPAGE_MASK) == 0);
|
||||
@ -181,8 +181,8 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
|
||||
* On success a pointer to the initialized base_block_t header is returned.
|
||||
*/
|
||||
static base_block_t *
|
||||
base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
|
||||
pszind_t *pind_last, size_t *extent_sn_next, size_t size,
|
||||
base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
|
||||
unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
|
||||
size_t alignment) {
|
||||
alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
|
||||
size_t usize = ALIGNMENT_CEILING(size, alignment);
|
||||
@ -208,6 +208,26 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
|
||||
if (block == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (metadata_thp_enabled() && thp_state_madvise) {
|
||||
void *addr = (void *)block;
|
||||
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
|
||||
(block_size & HUGEPAGE_MASK) == 0);
|
||||
/* base == NULL indicates this is a new base. */
|
||||
if (base != NULL || opt_metadata_thp == metadata_thp_always) {
|
||||
/* Use hugepage for the new block. */
|
||||
pages_huge(addr, block_size);
|
||||
}
|
||||
if (base != NULL && opt_metadata_thp == metadata_thp_auto) {
|
||||
/* Make the first block THP lazily. */
|
||||
base_block_t *first_block = base->blocks;
|
||||
if (first_block->next == NULL) {
|
||||
assert((first_block->size & HUGEPAGE_MASK) == 0);
|
||||
pages_huge(first_block, first_block->size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*pind_last = sz_psz2ind(block_size);
|
||||
block->size = block_size;
|
||||
block->next = NULL;
|
||||
@ -231,7 +251,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
|
||||
* called.
|
||||
*/
|
||||
malloc_mutex_unlock(tsdn, &base->mtx);
|
||||
base_block_t *block = base_block_alloc(tsdn, extent_hooks,
|
||||
base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
|
||||
base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
|
||||
alignment);
|
||||
malloc_mutex_lock(tsdn, &base->mtx);
|
||||
@ -259,7 +279,7 @@ base_t *
|
||||
base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
|
||||
pszind_t pind_last = 0;
|
||||
size_t extent_sn_next = 0;
|
||||
base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind,
|
||||
base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
|
||||
&pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
|
||||
if (block == NULL) {
|
||||
return NULL;
|
||||
|
@ -1570,7 +1570,8 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
|
||||
|
||||
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
|
||||
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
|
||||
CTL_RO_NL_GEN(opt_metadata_thp, opt_metadata_thp, bool)
|
||||
CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
|
||||
const char *)
|
||||
CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
|
||||
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
|
||||
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
|
||||
|
@ -1055,7 +1055,23 @@ malloc_conf_init(void) {
|
||||
if (opt_abort_conf && had_conf_error) {
|
||||
malloc_abort_invalid_conf();
|
||||
}
|
||||
CONF_HANDLE_BOOL(opt_metadata_thp, "metadata_thp")
|
||||
if (strncmp("metadata_thp", k, klen) == 0) {
|
||||
int i;
|
||||
bool match = false;
|
||||
for (i = 0; i < metadata_thp_mode_limit; i++) {
|
||||
if (strncmp(metadata_thp_mode_names[i],
|
||||
v, vlen) == 0) {
|
||||
opt_metadata_thp = i;
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!match) {
|
||||
malloc_conf_error("Invalid conf value",
|
||||
k, klen, v, vlen);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
CONF_HANDLE_BOOL(opt_retain, "retain")
|
||||
if (strncmp("dss", k, klen) == 0) {
|
||||
int i;
|
||||
|
@ -418,7 +418,7 @@ os_overcommits_proc(void) {
|
||||
static void
|
||||
init_thp_state(void) {
|
||||
if (!have_madvise_huge) {
|
||||
if (opt_metadata_thp && opt_abort) {
|
||||
if (metadata_thp_enabled() && opt_abort) {
|
||||
malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
|
||||
abort();
|
||||
}
|
||||
|
@ -802,11 +802,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
|
||||
}
|
||||
OPT_WRITE_BOOL(abort, ",")
|
||||
OPT_WRITE_BOOL(abort_conf, ",")
|
||||
OPT_WRITE_BOOL(metadata_thp, ",")
|
||||
OPT_WRITE_BOOL(retain, ",")
|
||||
OPT_WRITE_CHAR_P(dss, ",")
|
||||
OPT_WRITE_UNSIGNED(narenas, ",")
|
||||
OPT_WRITE_CHAR_P(percpu_arena, ",")
|
||||
OPT_WRITE_CHAR_P(metadata_thp, ",")
|
||||
OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",")
|
||||
OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
|
||||
OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")
|
||||
|
@ -158,7 +158,7 @@ TEST_BEGIN(test_mallctl_opt) {
|
||||
|
||||
TEST_MALLCTL_OPT(bool, abort, always);
|
||||
TEST_MALLCTL_OPT(bool, abort_conf, always);
|
||||
TEST_MALLCTL_OPT(bool, metadata_thp, always);
|
||||
TEST_MALLCTL_OPT(const char *, metadata_thp, always);
|
||||
TEST_MALLCTL_OPT(bool, retain, always);
|
||||
TEST_MALLCTL_OPT(const char *, dss, always);
|
||||
TEST_MALLCTL_OPT(unsigned, narenas, always);
|
||||
|
Loading…
Reference in New Issue
Block a user