Change opt.metadata_thp to [disabled,auto,always].

To avoid the high RSS caused by THP + low usage arena (i.e. THP becomes a
significant percentage), added a new "auto" option which will only start using
THP after a base allocator used up the first THP region.  Starting from the
second hugepage (in a single arena), "auto" behaves the same as "always",
i.e. madvise hugepage right away.
This commit is contained in:
Qi Wang 2017-08-24 14:29:28 -07:00 committed by Qi Wang
parent ea91dfa58e
commit 47b20bb654
10 changed files with 84 additions and 25 deletions

View File

@ -919,13 +919,15 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
<varlistentry id="opt.metadata_thp">
<term>
<mallctl>opt.metadata_thp</mallctl>
(<type>bool</type>)
(<type>const char *</type>)
<literal>r-</literal>
</term>
<listitem><para>If true, allow jemalloc to use transparent huge page
(THP) for internal metadata (see <link
linkend="stats.metadata">stats.metadata</link> for details). This
option is disabled by default.</para></listitem>
<listitem><para>Controls whether to allow jemalloc to use transparent
huge page (THP) for internal metadata (see <link
linkend="stats.metadata">stats.metadata</link>). <quote>always</quote>
allows such usage. <quote>auto</quote> uses no THP initially, but may
begin to do so when metadata usage reaches certain level. The default
is <quote>disabled</quote>.</para></listitem>
</varlistentry>
<varlistentry id="opt.retain">

View File

@ -1,7 +1,8 @@
#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
extern bool opt_metadata_thp;
extern metadata_thp_mode_t opt_metadata_thp;
extern const char *metadata_thp_mode_names[];
base_t *b0get(void);
base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);

View File

@ -6,4 +6,8 @@ base_ind_get(const base_t *base) {
return base->ind;
}
static inline bool
metadata_thp_enabled(void) {
return (opt_metadata_thp != metadata_thp_disabled);
}
#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */

View File

@ -4,6 +4,21 @@
typedef struct base_block_s base_block_t;
typedef struct base_s base_t;
#define METADATA_THP_DEFAULT false
#define METADATA_THP_DEFAULT metadata_thp_disabled
typedef enum {
metadata_thp_disabled = 0,
/*
* Lazily enable hugepage for metadata. To avoid high RSS caused by THP
* + low usage arena (i.e. THP becomes a significant percentage), the
* "auto" option only starts using THP after a base allocator used up
* the first THP region. Starting from the second hugepage (in a single
* arena), "auto" behaves the same as "always", i.e. madvise hugepage
* right away.
*/
metadata_thp_auto = 1,
metadata_thp_always = 2,
metadata_thp_mode_limit = 3
} metadata_thp_mode_t;
#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */

View File

@ -12,7 +12,13 @@
static base_t *b0;
bool opt_metadata_thp = METADATA_THP_DEFAULT;
metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
const char *metadata_thp_mode_names[] = {
"disabled",
"auto",
"always"
};
/******************************************************************************/
@ -24,7 +30,7 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
/* We use hugepage sizes regardless of opt_metadata_thp. */
assert(size == HUGEPAGE_CEILING(size));
size_t alignment = opt_metadata_thp ? HUGEPAGE : PAGE;
size_t alignment = metadata_thp_enabled() ? HUGEPAGE : PAGE;
if (extent_hooks == &extent_hooks_default) {
addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
} else {
@ -36,12 +42,6 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
post_reentrancy(tsd);
}
if (addr != NULL && opt_metadata_thp && thp_state_madvise) {
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
(size & HUGEPAGE_MASK) == 0);
pages_huge(addr, size);
}
return addr;
}
@ -101,7 +101,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
post_reentrancy(tsd);
}
label_done:
if (opt_metadata_thp && thp_state_madvise) {
if (metadata_thp_enabled() && thp_state_madvise) {
/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
(size & HUGEPAGE_MASK) == 0);
@ -181,8 +181,8 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
* On success a pointer to the initialized base_block_t header is returned.
*/
static base_block_t *
base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
pszind_t *pind_last, size_t *extent_sn_next, size_t size,
base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
size_t alignment) {
alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
size_t usize = ALIGNMENT_CEILING(size, alignment);
@ -208,6 +208,26 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
if (block == NULL) {
return NULL;
}
if (metadata_thp_enabled() && thp_state_madvise) {
void *addr = (void *)block;
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
(block_size & HUGEPAGE_MASK) == 0);
/* base == NULL indicates this is a new base. */
if (base != NULL || opt_metadata_thp == metadata_thp_always) {
/* Use hugepage for the new block. */
pages_huge(addr, block_size);
}
if (base != NULL && opt_metadata_thp == metadata_thp_auto) {
/* Make the first block THP lazily. */
base_block_t *first_block = base->blocks;
if (first_block->next == NULL) {
assert((first_block->size & HUGEPAGE_MASK) == 0);
pages_huge(first_block, first_block->size);
}
}
}
*pind_last = sz_psz2ind(block_size);
block->size = block_size;
block->next = NULL;
@ -231,7 +251,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
* called.
*/
malloc_mutex_unlock(tsdn, &base->mtx);
base_block_t *block = base_block_alloc(tsdn, extent_hooks,
base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
alignment);
malloc_mutex_lock(tsdn, &base->mtx);
@ -259,7 +279,7 @@ base_t *
base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
pszind_t pind_last = 0;
size_t extent_sn_next = 0;
base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind,
base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
&pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
if (block == NULL) {
return NULL;

View File

@ -1570,7 +1570,8 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
CTL_RO_NL_GEN(opt_metadata_thp, opt_metadata_thp, bool)
CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
const char *)
CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)

View File

@ -1055,7 +1055,23 @@ malloc_conf_init(void) {
if (opt_abort_conf && had_conf_error) {
malloc_abort_invalid_conf();
}
CONF_HANDLE_BOOL(opt_metadata_thp, "metadata_thp")
if (strncmp("metadata_thp", k, klen) == 0) {
int i;
bool match = false;
for (i = 0; i < metadata_thp_mode_limit; i++) {
if (strncmp(metadata_thp_mode_names[i],
v, vlen) == 0) {
opt_metadata_thp = i;
match = true;
break;
}
}
if (!match) {
malloc_conf_error("Invalid conf value",
k, klen, v, vlen);
}
continue;
}
CONF_HANDLE_BOOL(opt_retain, "retain")
if (strncmp("dss", k, klen) == 0) {
int i;

View File

@ -418,7 +418,7 @@ os_overcommits_proc(void) {
static void
init_thp_state(void) {
if (!have_madvise_huge) {
if (opt_metadata_thp && opt_abort) {
if (metadata_thp_enabled() && opt_abort) {
malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
abort();
}

View File

@ -802,11 +802,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
}
OPT_WRITE_BOOL(abort, ",")
OPT_WRITE_BOOL(abort_conf, ",")
OPT_WRITE_BOOL(metadata_thp, ",")
OPT_WRITE_BOOL(retain, ",")
OPT_WRITE_CHAR_P(dss, ",")
OPT_WRITE_UNSIGNED(narenas, ",")
OPT_WRITE_CHAR_P(percpu_arena, ",")
OPT_WRITE_CHAR_P(metadata_thp, ",")
OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",")
OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")

View File

@ -158,7 +158,7 @@ TEST_BEGIN(test_mallctl_opt) {
TEST_MALLCTL_OPT(bool, abort, always);
TEST_MALLCTL_OPT(bool, abort_conf, always);
TEST_MALLCTL_OPT(bool, metadata_thp, always);
TEST_MALLCTL_OPT(const char *, metadata_thp, always);
TEST_MALLCTL_OPT(bool, retain, always);
TEST_MALLCTL_OPT(const char *, dss, always);
TEST_MALLCTL_OPT(unsigned, narenas, always);