From e98a620c59ac20b13e2de796164cc67f050ed2bf Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 17 Nov 2016 13:36:17 -0800 Subject: [PATCH] Mark partially purged arena chunks as non-hugepage. Add the pages_[no]huge() functions, which toggle huge page state via madvise(..., MADV_[NO]HUGEPAGE) calls. The first time a page run is purged from within an arena chunk, call pages_nohuge() to tell the kernel to make no further attempts to back the chunk with huge pages. Upon arena chunk deletion, restore the associated virtual memory to its original state via pages_huge(). This resolves #243. --- Makefile.in | 1 + configure.ac | 14 +++++++-- include/jemalloc/internal/arena.h | 8 +++++ .../internal/jemalloc_internal_defs.h.in | 6 ++++ include/jemalloc/internal/pages.h | 2 ++ include/jemalloc/internal/private_symbols.txt | 2 ++ src/arena.c | 24 +++++++++++++- src/pages.c | 31 ++++++++++++++++++- test/unit/pages.c | 27 ++++++++++++++++ 9 files changed, 110 insertions(+), 5 deletions(-) create mode 100644 test/unit/pages.c diff --git a/Makefile.in b/Makefile.in index 836d4e9c..c7053639 100644 --- a/Makefile.in +++ b/Makefile.in @@ -167,6 +167,7 @@ TESTS_UNIT := \ $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c \ $(srcroot)test/unit/pack.c \ + $(srcroot)test/unit/pages.c \ $(srcroot)test/unit/ph.c \ $(srcroot)test/unit/prng.c \ $(srcroot)test/unit/prof_accum.c \ diff --git a/configure.ac b/configure.ac index 197414c5..3fdd1b93 100644 --- a/configure.ac +++ b/configure.ac @@ -1612,6 +1612,8 @@ JE_COMPILABLE([madvise(2)], [ madvise((void *)0, 0, 0); ], [je_cv_madvise]) if test "x${je_cv_madvise}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ]) + dnl Check for madvise(..., MADV_FREE). JE_COMPILABLE([madvise(..., MADV_FREE)], [ #include @@ -1632,9 +1634,15 @@ if test "x${je_cv_madvise}" = "xyes" ; then AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ]) fi - if test "x${je_cv_madv_free}" = "xyes" \ - -o "x${je_cv_madv_dontneed}" = "xyes" ; then - AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ]) + dnl Check for madvise(..., MADV_[NO]HUGEPAGE). + JE_COMPILABLE([madvise(..., MADV_[[NO]]HUGEPAGE)], [ +#include +], [ + madvise((void *)0, 0, MADV_HUGEPAGE); + madvise((void *)0, 0, MADV_NOHUGEPAGE); +], [je_cv_thp]) + if test "x${je_cv_thp}" = "xyes" ; then + AC_DEFINE([JEMALLOC_THP], [ ]) fi fi diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 30e2bdd6..ce4e6029 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -190,6 +190,14 @@ struct arena_chunk_s { */ extent_node_t node; + /* + * True if memory could be backed by transparent huge pages. This is + * only directly relevant to Linux, since it is the only supported + * platform on which jemalloc interacts with explicit transparent huge + * page controls. + */ + bool hugepage; + /* * Map of pages within chunk that keeps track of free/large/small. The * first map_bias entries are omitted, since the chunk header does not diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index d530119b..d7f3ef1c 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -265,6 +265,12 @@ #undef JEMALLOC_PURGE_MADVISE_FREE #undef JEMALLOC_PURGE_MADVISE_DONTNEED +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#undef JEMALLOC_THP + /* Define if operating system has alloca.h header. */ #undef JEMALLOC_HAS_ALLOCA_H diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h index e21effd1..4ae9f156 100644 --- a/include/jemalloc/internal/pages.h +++ b/include/jemalloc/internal/pages.h @@ -16,6 +16,8 @@ void *pages_trim(void *addr, size_t alloc_size, size_t leadsize, bool pages_commit(void *addr, size_t size); bool pages_decommit(void *addr, size_t size); bool pages_purge(void *addr, size_t size); +bool pages_huge(void *addr, size_t size); +bool pages_nohuge(void *addr, size_t size); void pages_boot(void); #endif /* JEMALLOC_H_EXTERNS */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index 71bfb94d..c1c6c409 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -397,7 +397,9 @@ p2rz pages_boot pages_commit pages_decommit +pages_huge pages_map +pages_nohuge pages_purge pages_trim pages_unmap diff --git a/src/arena.c b/src/arena.c index 87eead81..648a8da3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -664,6 +664,8 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena) if (chunk == NULL) return (NULL); + chunk->hugepage = true; + /* * Initialize the map to contain one maximal free untouched run. Mark * the pages as zeroed if arena_chunk_alloc_internal() returned a zeroed @@ -727,13 +729,14 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena) static void arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) { - size_t sn; + size_t sn, hugepage; bool committed; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; chunk_deregister(chunk, &chunk->node); sn = extent_node_sn_get(&chunk->node); + hugepage = chunk->hugepage; committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0); if (!committed) { /* @@ -746,6 +749,14 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE, arena->ind); } + if (!hugepage) { + /* + * Convert chunk back to the default state, so that all + * subsequent chunk allocations start out with chunks that can + * be backed by transparent huge pages. + */ + pages_huge(chunk, chunksize); + } chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize, sn, committed); @@ -1682,6 +1693,17 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, run_size = arena_mapbits_large_size_get(chunk, pageind); npages = run_size >> LG_PAGE; + /* + * If this is the first run purged within chunk, mark + * the chunk as non-huge. This will prevent all use of + * transparent huge pages for this chunk until the chunk + * as a whole is deallocated. + */ + if (chunk->hugepage) { + pages_nohuge(chunk, chunksize); + chunk->hugepage = false; + } + assert(pageind + npages <= chunk_npages); assert(!arena_mapbits_decommitted_get(chunk, pageind)); assert(!arena_mapbits_decommitted_get(chunk, diff --git a/src/pages.c b/src/pages.c index 395ace99..a56d10b7 100644 --- a/src/pages.c +++ b/src/pages.c @@ -170,7 +170,8 @@ pages_purge(void *addr, size_t size) #ifdef _WIN32 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); unzeroed = true; -#elif defined(JEMALLOC_HAVE_MADVISE) +#elif (defined(JEMALLOC_PURGE_MADVISE_FREE) || \ + defined(JEMALLOC_PURGE_MADVISE_FREE)) # if defined(JEMALLOC_PURGE_MADVISE_FREE) # define JEMALLOC_MADV_PURGE MADV_FREE # define JEMALLOC_MADV_ZEROS false @@ -191,6 +192,34 @@ pages_purge(void *addr, size_t size) return (unzeroed); } +bool +pages_huge(void *addr, size_t size) +{ + + assert(PAGE_ADDR2BASE(addr) == addr); + assert(PAGE_CEILING(size) == size); + +#ifdef JEMALLOC_THP + return (madvise(addr, size, MADV_HUGEPAGE) != 0); +#else + return (false); +#endif +} + +bool +pages_nohuge(void *addr, size_t size) +{ + + assert(PAGE_ADDR2BASE(addr) == addr); + assert(PAGE_CEILING(size) == size); + +#ifdef JEMALLOC_THP + return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); +#else + return (false); +#endif +} + #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT static bool os_overcommits_sysctl(void) diff --git a/test/unit/pages.c b/test/unit/pages.c new file mode 100644 index 00000000..d31a35e6 --- /dev/null +++ b/test/unit/pages.c @@ -0,0 +1,27 @@ +#include "test/jemalloc_test.h" + +TEST_BEGIN(test_pages_huge) +{ + bool commit; + void *pages; + + commit = true; + pages = pages_map(NULL, PAGE, &commit); + assert_ptr_not_null(pages, "Unexpected pages_map() error"); + + assert_false(pages_huge(pages, PAGE), + "Unexpected pages_huge() result"); + assert_false(pages_nohuge(pages, PAGE), + "Unexpected pages_nohuge() result"); + + pages_unmap(pages, PAGE); +} +TEST_END + +int +main(void) +{ + + return (test( + test_pages_huge)); +}