diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL index 8d94e179..66c0c747 100644 --- a/jemalloc/INSTALL +++ b/jemalloc/INSTALL @@ -56,6 +56,11 @@ any of the following arguments (not a definitive list) to 'configure': cached and released in bulk, thus reducing the total number of mutex operations. Use the 'H' and 'G' options to control thread-specific caching. +--enable-swap + Enable mmap()ed swap file support. When this feature is built in, it is + possible to specify one or more files that act as backing store. This + effectively allows for per application swap files. + --enable-dss Enable support for page allocation/deallocation via sbrk(2), in addition to mmap(2). diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in index 0e6f9290..92b8151b 100644 --- a/jemalloc/Makefile.in +++ b/jemalloc/Makefile.in @@ -39,9 +39,11 @@ CHDRS := @objroot@src/jemalloc@install_suffix@.h \ @objroot@src/jemalloc_defs@install_suffix@.h CSRCS := @srcroot@src/jemalloc.c @srcroot@src/jemalloc_arena.c \ @srcroot@src/jemalloc_base.c @srcroot@src/jemalloc_chunk.c \ - @srcroot@src/jemalloc_extent.c @srcroot@src/jemalloc_huge.c \ - @srcroot@src/jemalloc_mutex.c @srcroot@src/jemalloc_stats.c \ - @srcroot@src/jemalloc_tcache.c @srcroot@src/jemalloc_trace.c + @srcroot@src/jemalloc_chunk_dss.c @srcroot@src/jemalloc_chunk_mmap.c \ + @srcroot@src/jemalloc_chunk_swap.c @srcroot@src/jemalloc_extent.c \ + @srcroot@src/jemalloc_huge.c @srcroot@src/jemalloc_mutex.c \ + @srcroot@src/jemalloc_stats.c @srcroot@src/jemalloc_tcache.c \ + @srcroot@src/jemalloc_trace.c DSOS := @objroot@lib/libjemalloc@install_suffix@.so.$(REV) \ @objroot@lib/libjemalloc@install_suffix@.so \ @objroot@lib/libjemalloc@install_suffix@_pic.a diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac index f6226824..ee9cd8ea 100644 --- a/jemalloc/configure.ac +++ b/jemalloc/configure.ac @@ -400,6 +400,28 @@ fi ) dnl Finish tcache-related definitions below, once TLS configuration is done. +dnl Do not enable mmap()ped swap files by default. +AC_ARG_ENABLE([swap], + [AS_HELP_STRING([--enable-swap], [Enable mmap()ped swap files])], +[if test "x$enable_swap" = "xno" ; then + enable_swap="0" +else + enable_swap="1" +fi +], +[enable_swap="0"] +) +if test "x$enable_swap" = "x1" ; then + AC_DEFINE([JEMALLOC_SWAP], [ ]) +fi +AC_SUBST([enable_swap]) +if test "x$enable_swap" = "x0" ; then + roff_swap=".\\\" " +else + roff_swap="" +fi +AC_SUBST([roff_swap]) + dnl Do not enable allocation from DSS by default. AC_ARG_ENABLE([dss], [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])], @@ -688,6 +710,7 @@ AC_MSG_RESULT([tcache : ${enable_tcache}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) AC_MSG_RESULT([sysv : ${enable_sysv}]) +AC_MSG_RESULT([swap : ${enable_swap}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in index bf14644d..46feab43 100644 --- a/jemalloc/doc/jemalloc.3.in +++ b/jemalloc/doc/jemalloc.3.in @@ -38,12 +38,19 @@ .\" @(#)malloc.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $ .\" -.Dd January 17, 2010 +.Dd January 23, 2010 .Dt JEMALLOC 3 .Os .Sh NAME -@roff_tcache@.Nm @jemalloc_prefix@malloc , @jemalloc_prefix@calloc , @jemalloc_prefix@posix_memalign , @jemalloc_prefix@realloc , @jemalloc_prefix@free , @jemalloc_prefix@malloc_usable_size , @jemalloc_prefix@malloc_tcache_flush , @jemalloc_prefix@malloc_stats_print -@roff_no_tcache@.Nm @jemalloc_prefix@malloc , @jemalloc_prefix@calloc , @jemalloc_prefix@posix_memalign , @jemalloc_prefix@realloc , @jemalloc_prefix@free , @jemalloc_prefix@malloc_usable_size , @jemalloc_prefix@malloc_stats_print +.Nm @jemalloc_prefix@malloc , +.Nm @jemalloc_prefix@calloc , +.Nm @jemalloc_prefix@posix_memalign , +.Nm @jemalloc_prefix@realloc , +.Nm @jemalloc_prefix@free , +.Nm @jemalloc_prefix@malloc_usable_size , +@roff_swap@.Nm @jemalloc_prefix@malloc_swap_enable , +@roff_tcache@.Nm @jemalloc_prefix@malloc_tcache_flush , +.Nm @jemalloc_prefix@malloc_stats_print .Nd general purpose memory allocation functions .Sh LIBRARY .Lb libjemalloc@install_suffix@ @@ -62,6 +69,8 @@ .Fn @jemalloc_prefix@free "void *ptr" .Ft size_t .Fn @jemalloc_prefix@malloc_usable_size "const void *ptr" +@roff_swap@.Ft int +@roff_swap@.Fn @jemalloc_prefix@malloc_swap_enable "const int *fds" "unsigned nfds" "int prezeroed" @roff_tcache@.Ft void @roff_tcache@.Fn @jemalloc_prefix@malloc_tcache_flush "void" .Ft void @@ -164,6 +173,27 @@ Any discrepancy between the requested allocation size and the size reported by .Fn @jemalloc_prefix@malloc_usable_size should not be depended on, since such behavior is entirely implementation-dependent. +@roff_swap@.Pp +@roff_swap@The +@roff_swap@.Fn @jemalloc_prefix@malloc_swap_enable +@roff_swap@function opens and contiguously maps a list of +@roff_swap@.Fa nfds +@roff_swap@file descriptors pointed to by +@roff_swap@.Fa fds +@roff_swap@via +@roff_swap@.Xr mmap 2 . +@roff_swap@The resulting virtual memory region is preferred over anonymous +@roff_swap@.Xr mmap 2 +@roff_swap@@roff_dss@and +@roff_swap@@roff_dss@.Xr sbrk 2 +@roff_swap@memory. +@roff_swap@Note that if a file's size is not a multiple of the page size, it is +@roff_swap@automatically truncated to the nearest page size multiple. +@roff_swap@If +@roff_swap@.Fa prezeroed +@roff_swap@is non-zero, the allocator assumes that the file(s) contain nothing +@roff_swap@but nil bytes. +@roff_swap@If this assumption is violated, allocator behavior is undefined. @roff_tcache@.Pp @roff_tcache@The @roff_tcache@.Fn @jemalloc_prefix@malloc_tcache_flush @@ -310,6 +340,16 @@ The default number of arenas is @roff_tcache@two @roff_no_tcache@four times the number of CPUs, or one if there is a single CPU. +@roff_swap@.It O +@roff_swap@Over-commit memory as a side effect of using anonymous +@roff_swap@.Xr mmap 2 +@roff_swap@@roff_dss@ and +@roff_swap@@roff_dss@.Xr sbrk 2 +@roff_swap@for virtual memory allocation. +@roff_swap@In order for overcommit to be disabled, the +@roff_swap@.Fn malloc_swap_enable +@roff_swap@function must have been successfully called. +@roff_swap@This option is enabled by default. .It P The .Fn malloc_stats_print @@ -606,6 +646,11 @@ The .Fn @jemalloc_prefix@malloc_usable_size function returns the usable size of the allocation pointed to by .Fa ptr . +@roff_swap@.Pp +@roff_swap@The +@roff_swap@.Fn @jemalloc_prefix@malloc_swap_enable +@roff_swap@function returns the value 0 if successful; otherwise it returns a +@roff_swap@non-zero value. .Sh ENVIRONMENT The following environment variables affect the execution of the allocation functions: @@ -623,10 +668,9 @@ To dump core whenever a problem occurs: ln -s 'A' /etc/jemalloc.conf .Ed .Pp -To specify in the source that a program does no return value checking -on calls to these functions: +To specify in the source a chunk size that is twice the default: .Bd -literal -offset indent -@jemalloc_prefix@malloc_options = "X"; +@jemalloc_prefix@malloc_options = "K"; .Ed .Sh SEE ALSO .Xr mtrgraph 1 , @@ -634,7 +678,7 @@ on calls to these functions: .Xr jemtr2mtr 1 , .Xr madvise 2 , .Xr mmap 2 , -.Xr sbrk 2 , +@roff_dss@.Xr sbrk 2 , .Xr alloca 3 , .Xr atexit 3 , .Xr getpagesize 3 diff --git a/jemalloc/src/internal/jemalloc_base.h b/jemalloc/src/internal/jemalloc_base.h index ef650dd3..e353f309 100644 --- a/jemalloc/src/internal/jemalloc_base.h +++ b/jemalloc/src/internal/jemalloc_base.h @@ -9,9 +9,6 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -#ifdef JEMALLOC_STATS -extern size_t base_mapped; -#endif extern malloc_mutex_t base_mtx; void *base_alloc(size_t size); diff --git a/jemalloc/src/internal/jemalloc_chunk.h b/jemalloc/src/internal/jemalloc_chunk.h index b2e24fff..74401684 100644 --- a/jemalloc/src/internal/jemalloc_chunk.h +++ b/jemalloc/src/internal/jemalloc_chunk.h @@ -27,33 +27,22 @@ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS +extern size_t opt_lg_chunk; +#ifdef JEMALLOC_SWAP +extern bool opt_overcommit; +#endif + #ifdef JEMALLOC_STATS /* Chunk statistics. */ extern chunk_stats_t stats_chunks; #endif -extern size_t opt_lg_chunk; extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; extern size_t arena_chunk_header_npages; extern size_t arena_maxclass; /* Max size class for arenas. */ -#ifdef JEMALLOC_DSS -/* - * Protects sbrk() calls. This avoids malloc races among threads, though it - * does not protect against races with threads that call sbrk() directly. - */ -extern malloc_mutex_t dss_mtx; -/* Base address of the DSS. */ -extern void *dss_base; -/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ -extern void *dss_prev; -/* Current upper limit on DSS addresses. */ -extern void *dss_max; -#endif - -void *pages_map(void *addr, size_t size); void *chunk_alloc(size_t size, bool zero); void chunk_dealloc(void *chunk, size_t size); bool chunk_boot(void); @@ -64,3 +53,7 @@ bool chunk_boot(void); #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ + +#include "internal/jemalloc_chunk_swap.h" +#include "internal/jemalloc_chunk_dss.h" +#include "internal/jemalloc_chunk_mmap.h" diff --git a/jemalloc/src/internal/jemalloc_chunk_dss.h b/jemalloc/src/internal/jemalloc_chunk_dss.h new file mode 100644 index 00000000..dc7b38ea --- /dev/null +++ b/jemalloc/src/internal/jemalloc_chunk_dss.h @@ -0,0 +1,29 @@ +#ifdef JEMALLOC_DSS +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +/* + * Protects sbrk() calls. This avoids malloc races among threads, though it + * does not protect against races with threads that call sbrk() directly. + */ +extern malloc_mutex_t dss_mtx; + +void *chunk_alloc_dss(size_t size, bool zero); +bool chunk_dealloc_dss(void *chunk, size_t size); +bool chunk_dss_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_DSS */ diff --git a/jemalloc/src/internal/jemalloc_chunk_mmap.h b/jemalloc/src/internal/jemalloc_chunk_mmap.h new file mode 100644 index 00000000..8fb90b77 --- /dev/null +++ b/jemalloc/src/internal/jemalloc_chunk_mmap.h @@ -0,0 +1,20 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *chunk_alloc_mmap(size_t size); +void chunk_dealloc_mmap(void *chunk, size_t size); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/jemalloc/src/internal/jemalloc_chunk_swap.h b/jemalloc/src/internal/jemalloc_chunk_swap.h new file mode 100644 index 00000000..3d5c5d21 --- /dev/null +++ b/jemalloc/src/internal/jemalloc_chunk_swap.h @@ -0,0 +1,30 @@ +#ifdef JEMALLOC_SWAP +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern malloc_mutex_t swap_mtx; +extern bool swap_enabled; +#ifdef JEMALLOC_STATS +extern size_t swap_avail; +#endif + +void *chunk_alloc_swap(size_t size, bool zero); +bool chunk_dealloc_swap(void *chunk, size_t size); +bool chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed); +bool chunk_swap_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_SWAP */ diff --git a/jemalloc/src/internal/jemalloc_extent.h b/jemalloc/src/internal/jemalloc_extent.h index 0ce27c77..cb37dd25 100644 --- a/jemalloc/src/internal/jemalloc_extent.h +++ b/jemalloc/src/internal/jemalloc_extent.h @@ -9,7 +9,7 @@ typedef struct extent_node_s extent_node_t; /* Tree of extents. */ struct extent_node_s { -#ifdef JEMALLOC_DSS +#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) /* Linkage for the size/address-ordered tree. */ rb_node(extent_node_t) link_szad; #endif @@ -29,7 +29,7 @@ typedef rb_tree(extent_node_t) extent_tree_t; /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -#ifdef JEMALLOC_DSS +#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) #endif diff --git a/jemalloc/src/internal/jemalloc_stats.h b/jemalloc/src/internal/jemalloc_stats.h index 6985416e..4a3a8811 100644 --- a/jemalloc/src/internal/jemalloc_stats.h +++ b/jemalloc/src/internal/jemalloc_stats.h @@ -135,6 +135,8 @@ void malloc_cprintf(void (*write4)(void *, const char *, const char *, void malloc_printf(const char *format, ...) JEMALLOC_ATTR(format(printf, 1, 2)); #endif +void stats_print(void (*write4)(void *, const char *, const char *, + const char *, const char *), void *w4opaque, const char *opts); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index 6db5b014..a6e97935 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -632,6 +632,14 @@ MALLOC_OUT: case 'N': opt_narenas_lshift++; break; +#ifdef JEMALLOC_SWAP + case 'o': + opt_overcommit = false; + break; + case 'O': + opt_overcommit = true; + break; +#endif case 'p': opt_stats_print = false; break; @@ -1197,6 +1205,23 @@ JEMALLOC_P(malloc_usable_size)(const void *ptr) return (ret); } +#ifdef JEMALLOC_SWAP +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(malloc_swap_enable)(const int *fds, unsigned nfds, int prezeroed) +{ + + /* + * Make sure malloc is initialized, because we need page size, chunk + * size, etc. + */ + if (malloc_init()) + return (-1); + + return (chunk_swap_enable(fds, nfds, (prezeroed != 0)) ? -1 : 0); +} +#endif + #ifdef JEMALLOC_TCACHE JEMALLOC_ATTR(visibility("default")) void @@ -1213,6 +1238,15 @@ JEMALLOC_P(malloc_tcache_flush)(void) } #endif +JEMALLOC_ATTR(visibility("default")) +void +JEMALLOC_P(malloc_stats_print)(void (*write4)(void *, const char *, + const char *, const char *, const char *), void *w4opaque, const char *opts) +{ + + stats_print(write4, w4opaque, opts); +} + /* * End non-standard functions. */ @@ -1271,6 +1305,10 @@ jemalloc_prefork(void) #ifdef JEMALLOC_DSS malloc_mutex_lock(&dss_mtx); #endif + +#ifdef JEMALLOC_SWAP + malloc_mutex_lock(&swap_mtx); +#endif } static void @@ -1281,6 +1319,10 @@ jemalloc_postfork(void) /* Release all mutexes, now that fork() has completed. */ +#ifdef JEMALLOC_SWAP + malloc_mutex_unlock(&swap_mtx); +#endif + #ifdef JEMALLOC_DSS malloc_mutex_unlock(&dss_mtx); #endif diff --git a/jemalloc/src/jemalloc.h.in b/jemalloc/src/jemalloc.h.in index 507500cd..dee8d823 100644 --- a/jemalloc/src/jemalloc.h.in +++ b/jemalloc/src/jemalloc.h.in @@ -21,6 +21,10 @@ void *JEMALLOC_P(realloc)(void *ptr, size_t size); void JEMALLOC_P(free)(void *ptr); size_t JEMALLOC_P(malloc_usable_size)(const void *ptr); +#ifdef JEMALLOC_SWAP +int JEMALLOC_P(malloc_swap_enable)(const int *fds, unsigned nfds, + int prezeroed); +#endif #ifdef JEMALLOC_TCACHE void JEMALLOC_P(malloc_tcache_flush)(void); #endif diff --git a/jemalloc/src/jemalloc_arena.c b/jemalloc/src/jemalloc_arena.c index cad84686..3818f9ce 100644 --- a/jemalloc/src/jemalloc_arena.c +++ b/jemalloc/src/jemalloc_arena.c @@ -1570,6 +1570,7 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, *nactive += arena->nactive; *ndirty += arena->ndirty; + astats->mapped += arena->stats.mapped; astats->npurge += arena->stats.npurge; astats->nmadvise += arena->stats.nmadvise; astats->purged += arena->stats.purged; diff --git a/jemalloc/src/jemalloc_base.c b/jemalloc/src/jemalloc_base.c index af83cec5..bb88f7cf 100644 --- a/jemalloc/src/jemalloc_base.c +++ b/jemalloc/src/jemalloc_base.c @@ -1,9 +1,8 @@ #define JEMALLOC_BASE_C_ #include "internal/jemalloc_internal.h" -#ifdef JEMALLOC_STATS -size_t base_mapped; -#endif +/******************************************************************************/ +/* Data. */ malloc_mutex_t base_mtx; @@ -17,97 +16,27 @@ static void *base_next_addr; static void *base_past_addr; /* Addr immediately past base_pages. */ static extent_node_t *base_nodes; -#ifdef JEMALLOC_DSS -static bool base_pages_alloc_dss(size_t minsize); -#endif -static bool base_pages_alloc_mmap(size_t minsize); +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + static bool base_pages_alloc(size_t minsize); -#ifdef JEMALLOC_DSS -static bool -base_pages_alloc_dss(size_t minsize) -{ - - /* - * Do special DSS allocation here, since base allocations don't need to - * be chunk-aligned. - */ - malloc_mutex_lock(&dss_mtx); - if (dss_prev != (void *)-1) { - intptr_t incr; - size_t csize = CHUNK_CEILING(minsize); - - do { - /* Get the current end of the DSS. */ - dss_max = sbrk(0); - - /* - * Calculate how much padding is necessary to - * chunk-align the end of the DSS. Don't worry about - * dss_max not being chunk-aligned though. - */ - incr = (intptr_t)chunksize - - (intptr_t)CHUNK_ADDR2OFFSET(dss_max); - assert(incr >= 0); - if ((size_t)incr < minsize) - incr += csize; - - dss_prev = sbrk(incr); - if (dss_prev == dss_max) { - /* Success. */ - dss_max = (void *)((intptr_t)dss_prev + incr); - base_pages = dss_prev; - base_next_addr = base_pages; - base_past_addr = dss_max; -#ifdef JEMALLOC_STATS - base_mapped += incr; -#endif - malloc_mutex_unlock(&dss_mtx); - return (false); - } - } while (dss_prev != (void *)-1); - } - malloc_mutex_unlock(&dss_mtx); - - return (true); -} -#endif - -static bool -base_pages_alloc_mmap(size_t minsize) -{ - size_t csize; - - assert(minsize != 0); - csize = PAGE_CEILING(minsize); - base_pages = pages_map(NULL, csize); - if (base_pages == NULL) - return (true); - base_next_addr = base_pages; - base_past_addr = (void *)((uintptr_t)base_pages + csize); -#ifdef JEMALLOC_STATS - base_mapped += csize; -#endif - - return (false); -} +/******************************************************************************/ static bool base_pages_alloc(size_t minsize) { + size_t csize; -#ifdef JEMALLOC_DSS - if (base_pages_alloc_dss(minsize) == false) - return (false); + assert(minsize != 0); + csize = CHUNK_CEILING(minsize); + base_pages = chunk_alloc(csize, false); + if (base_pages == NULL) + return (true); + base_next_addr = base_pages; + base_past_addr = (void *)((uintptr_t)base_pages + csize); - if (minsize != 0) -#endif - { - if (base_pages_alloc_mmap(minsize) == false) - return (false); - } - - return (true); + return (false); } void * @@ -167,17 +96,6 @@ bool base_boot(void) { -#ifdef JEMALLOC_STATS - base_mapped = 0; -#endif -#ifdef JEMALLOC_DSS - /* - * Allocate a base chunk here, since it doesn't actually have to be - * chunk-aligned. Doing this before allocating any other chunks allows - * the use of space that would otherwise be wasted. - */ - base_pages_alloc(0); -#endif base_nodes = NULL; if (malloc_mutex_init(&base_mtx)) return (true); diff --git a/jemalloc/src/jemalloc_chunk.c b/jemalloc/src/jemalloc_chunk.c index 9cd0c211..2da85a6c 100644 --- a/jemalloc/src/jemalloc_chunk.c +++ b/jemalloc/src/jemalloc_chunk.c @@ -5,6 +5,9 @@ /* Data. */ size_t opt_lg_chunk = LG_CHUNK_DEFAULT; +#ifdef JEMALLOC_SWAP +bool opt_overcommit = true; +#endif #ifdef JEMALLOC_STATS chunk_stats_t stats_chunks; @@ -17,309 +20,7 @@ size_t chunk_npages; size_t arena_chunk_header_npages; size_t arena_maxclass; /* Max size class for arenas. */ -#ifdef JEMALLOC_DSS -malloc_mutex_t dss_mtx; -void *dss_base; -void *dss_prev; -void *dss_max; - -/* - * Trees of chunks that were previously allocated (trees differ only in node - * ordering). These are used when allocating chunks, in an attempt to re-use - * address space. Depending on function, different tree orderings are needed, - * which is why there are two trees with the same contents. - */ -static extent_tree_t dss_chunks_szad; -static extent_tree_t dss_chunks_ad; -#endif - -/* - * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and - * potentially avoid some system calls. We can get away without TLS here, - * since the state of mmap_unaligned only affects performance, rather than - * correct function. - */ -static -#ifndef NO_TLS - __thread -#endif - bool mmap_unaligned -#ifndef NO_TLS - JEMALLOC_ATTR(tls_model("initial-exec")) -#endif - ; /******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void pages_unmap(void *addr, size_t size); -#ifdef JEMALLOC_DSS -static void *chunk_alloc_dss(size_t size); -static void *chunk_recycle_dss(size_t size, bool zero); -#endif -static void *chunk_alloc_mmap_slow(size_t size, bool unaligned); -static void *chunk_alloc_mmap(size_t size); -#ifdef JEMALLOC_DSS -static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); -static bool chunk_dealloc_dss(void *chunk, size_t size); -#endif -static void chunk_dealloc_mmap(void *chunk, size_t size); - -/******************************************************************************/ - -void * -pages_map(void *addr, size_t size) -{ - void *ret; - - /* - * We don't use MAP_FIXED here, because it can cause the *replacement* - * of existing mappings, and we only want to create new mappings. - */ - ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, - -1, 0); - assert(ret != NULL); - - if (ret == MAP_FAILED) - ret = NULL; - else if (addr != NULL && ret != addr) { - /* - * We succeeded in mapping memory, but not in the right place. - */ - if (munmap(ret, size) == -1) { - char buf[STRERROR_BUF]; - - strerror_r(errno, buf, sizeof(buf)); - malloc_write4("", ": Error in munmap(): ", - buf, "\n"); - if (opt_abort) - abort(); - } - ret = NULL; - } - - assert(ret == NULL || (addr == NULL && ret != addr) - || (addr != NULL && ret == addr)); - return (ret); -} - -static void -pages_unmap(void *addr, size_t size) -{ - - if (munmap(addr, size) == -1) { - char buf[STRERROR_BUF]; - - strerror_r(errno, buf, sizeof(buf)); - malloc_write4("", ": Error in munmap(): ", buf, "\n"); - if (opt_abort) - abort(); - } -} - -#ifdef JEMALLOC_DSS -static void * -chunk_alloc_dss(size_t size) -{ - - /* - * sbrk() uses a signed increment argument, so take care not to - * interpret a huge allocation request as a negative increment. - */ - if ((intptr_t)size < 0) - return (NULL); - - malloc_mutex_lock(&dss_mtx); - if (dss_prev != (void *)-1) { - intptr_t incr; - - /* - * The loop is necessary to recover from races with other - * threads that are using the DSS for something other than - * malloc. - */ - do { - void *ret; - - /* Get the current end of the DSS. */ - dss_max = sbrk(0); - - /* - * Calculate how much padding is necessary to - * chunk-align the end of the DSS. - */ - incr = (intptr_t)size - - (intptr_t)CHUNK_ADDR2OFFSET(dss_max); - if (incr == (intptr_t)size) - ret = dss_max; - else { - ret = (void *)((intptr_t)dss_max + incr); - incr += size; - } - - dss_prev = sbrk(incr); - if (dss_prev == dss_max) { - /* Success. */ - dss_max = (void *)((intptr_t)dss_prev + incr); - malloc_mutex_unlock(&dss_mtx); - return (ret); - } - } while (dss_prev != (void *)-1); - } - malloc_mutex_unlock(&dss_mtx); - - return (NULL); -} - -static void * -chunk_recycle_dss(size_t size, bool zero) -{ - extent_node_t *node, key; - - key.addr = NULL; - key.size = size; - malloc_mutex_lock(&dss_mtx); - node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); - if (node != NULL) { - void *ret = node->addr; - - /* Remove node from the tree. */ - extent_tree_szad_remove(&dss_chunks_szad, node); - if (node->size == size) { - extent_tree_ad_remove(&dss_chunks_ad, node); - base_node_dealloc(node); - } else { - /* - * Insert the remainder of node's address range as a - * smaller chunk. Its position within dss_chunks_ad - * does not change. - */ - assert(node->size > size); - node->addr = (void *)((uintptr_t)node->addr + size); - node->size -= size; - extent_tree_szad_insert(&dss_chunks_szad, node); - } - malloc_mutex_unlock(&dss_mtx); - - if (zero) - memset(ret, 0, size); - return (ret); - } - malloc_mutex_unlock(&dss_mtx); - - return (NULL); -} -#endif - -static void * -chunk_alloc_mmap_slow(size_t size, bool unaligned) -{ - void *ret; - size_t offset; - - /* Beware size_t wrap-around. */ - if (size + chunksize <= size) - return (NULL); - - ret = pages_map(NULL, size + chunksize); - if (ret == NULL) - return (NULL); - - /* Clean up unneeded leading/trailing space. */ - offset = CHUNK_ADDR2OFFSET(ret); - if (offset != 0) { - /* Note that mmap() returned an unaligned mapping. */ - unaligned = true; - - /* Leading space. */ - pages_unmap(ret, chunksize - offset); - - ret = (void *)((uintptr_t)ret + - (chunksize - offset)); - - /* Trailing space. */ - pages_unmap((void *)((uintptr_t)ret + size), - offset); - } else { - /* Trailing space only. */ - pages_unmap((void *)((uintptr_t)ret + size), - chunksize); - } - - /* - * If mmap() returned an aligned mapping, reset mmap_unaligned so that - * the next chunk_alloc_mmap() execution tries the fast allocation - * method. - */ - if (unaligned == false) - mmap_unaligned = false; - - return (ret); -} - -static void * -chunk_alloc_mmap(size_t size) -{ - void *ret; - - /* - * Ideally, there would be a way to specify alignment to mmap() (like - * NetBSD has), but in the absence of such a feature, we have to work - * hard to efficiently create aligned mappings. The reliable, but - * slow method is to create a mapping that is over-sized, then trim the - * excess. However, that always results in at least one call to - * pages_unmap(). - * - * A more optimistic approach is to try mapping precisely the right - * amount, then try to append another mapping if alignment is off. In - * practice, this works out well as long as the application is not - * interleaving mappings via direct mmap() calls. If we do run into a - * situation where there is an interleaved mapping and we are unable to - * extend an unaligned mapping, our best option is to switch to the - * slow method until mmap() returns another aligned mapping. This will - * tend to leave a gap in the memory map that is too small to cause - * later problems for the optimistic method. - * - * Another possible confounding factor is address space layout - * randomization (ASLR), which causes mmap(2) to disregard the - * requested address. mmap_unaligned tracks whether the previous - * chunk_alloc_mmap() execution received any unaligned or relocated - * mappings, and if so, the current execution will immediately fall - * back to the slow method. However, we keep track of whether the fast - * method would have succeeded, and if so, we make a note to try the - * fast method next time. - */ - - if (mmap_unaligned == false) { - size_t offset; - - ret = pages_map(NULL, size); - if (ret == NULL) - return (NULL); - - offset = CHUNK_ADDR2OFFSET(ret); - if (offset != 0) { - mmap_unaligned = true; - /* Try to extend chunk boundary. */ - if (pages_map((void *)((uintptr_t)ret + size), - chunksize - offset) == NULL) { - /* - * Extension failed. Clean up, then revert to - * the reliable-but-expensive method. - */ - pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, true); - } else { - /* Clean up unneeded leading space. */ - pages_unmap(ret, chunksize - offset); - ret = (void *)((uintptr_t)ret + (chunksize - - offset)); - } - } - } - ret = chunk_alloc_mmap_slow(size, false); - - return (ret); -} void * chunk_alloc(size_t size, bool zero) @@ -329,20 +30,26 @@ chunk_alloc(size_t size, bool zero) assert(size != 0); assert((size & chunksize_mask) == 0); -#ifdef JEMALLOC_DSS - ret = chunk_recycle_dss(size, zero); - if (ret != NULL) { - goto RETURN; +#ifdef JEMALLOC_SWAP + if (swap_enabled) { + ret = chunk_alloc_swap(size, zero); + if (ret != NULL) + goto RETURN; } - ret = chunk_alloc_dss(size); - if (ret != NULL) - goto RETURN; - + if (swap_enabled == false || opt_overcommit) { +#endif +#ifdef JEMALLOC_DSS + ret = chunk_alloc_dss(size, zero); + if (ret != NULL) + goto RETURN; +#endif + ret = chunk_alloc_mmap(size); + if (ret != NULL) + goto RETURN; +#ifdef JEMALLOC_SWAP + } #endif - ret = chunk_alloc_mmap(size); - if (ret != NULL) - goto RETURN; /* All strategies for allocation failed. */ ret = NULL; @@ -360,122 +67,6 @@ RETURN: return (ret); } -#ifdef JEMALLOC_DSS -static extent_node_t * -chunk_dealloc_dss_record(void *chunk, size_t size) -{ - extent_node_t *node, *prev, key; - - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&dss_chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { - /* - * Coalesce chunk with the following address range. This does - * not change the position within dss_chunks_ad, so only - * remove/insert from/into dss_chunks_szad. - */ - extent_tree_szad_remove(&dss_chunks_szad, node); - node->addr = chunk; - node->size += size; - extent_tree_szad_insert(&dss_chunks_szad, node); - } else { - /* - * Coalescing forward failed, so insert a new node. Drop - * dss_mtx during node allocation, since it is possible that a - * new base chunk will be allocated. - */ - malloc_mutex_unlock(&dss_mtx); - node = base_node_alloc(); - malloc_mutex_lock(&dss_mtx); - if (node == NULL) - return (NULL); - node->addr = chunk; - node->size = size; - extent_tree_ad_insert(&dss_chunks_ad, node); - extent_tree_szad_insert(&dss_chunks_szad, node); - } - - /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(&dss_chunks_ad, node); - if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == - chunk) { - /* - * Coalesce chunk with the previous address range. This does - * not change the position within dss_chunks_ad, so only - * remove/insert node from/into dss_chunks_szad. - */ - extent_tree_szad_remove(&dss_chunks_szad, prev); - extent_tree_ad_remove(&dss_chunks_ad, prev); - - extent_tree_szad_remove(&dss_chunks_szad, node); - node->addr = prev->addr; - node->size += prev->size; - extent_tree_szad_insert(&dss_chunks_szad, node); - - base_node_dealloc(prev); - } - - return (node); -} - -static bool -chunk_dealloc_dss(void *chunk, size_t size) -{ - - malloc_mutex_lock(&dss_mtx); - if ((uintptr_t)chunk >= (uintptr_t)dss_base - && (uintptr_t)chunk < (uintptr_t)dss_max) { - extent_node_t *node; - - /* Try to coalesce with other unused chunks. */ - node = chunk_dealloc_dss_record(chunk, size); - if (node != NULL) { - chunk = node->addr; - size = node->size; - } - - /* Get the current end of the DSS. */ - dss_max = sbrk(0); - - /* - * Try to shrink the DSS if this chunk is at the end of the - * DSS. The sbrk() call here is subject to a race condition - * with threads that use brk(2) or sbrk(2) directly, but the - * alternative would be to leak memory for the sake of poorly - * designed multi-threaded programs. - */ - if ((void *)((uintptr_t)chunk + size) == dss_max - && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) { - /* Success. */ - dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size); - - if (node != NULL) { - extent_tree_szad_remove(&dss_chunks_szad, node); - extent_tree_ad_remove(&dss_chunks_ad, node); - base_node_dealloc(node); - } - malloc_mutex_unlock(&dss_mtx); - } else { - malloc_mutex_unlock(&dss_mtx); - madvise(chunk, size, MADV_DONTNEED); - } - - return (false); - } - malloc_mutex_unlock(&dss_mtx); - - return (true); -} -#endif - -static void -chunk_dealloc_mmap(void *chunk, size_t size) -{ - - pages_unmap(chunk, size); -} - void chunk_dealloc(void *chunk, size_t size) { @@ -489,10 +80,13 @@ chunk_dealloc(void *chunk, size_t size) stats_chunks.curchunks -= (size / chunksize); #endif +#ifdef JEMALLOC_SWAP + if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) + return; +#endif #ifdef JEMALLOC_DSS if (chunk_dealloc_dss(chunk, size) == false) return; - #endif chunk_dealloc_mmap(chunk, size); } @@ -511,14 +105,13 @@ chunk_boot(void) memset(&stats_chunks, 0, sizeof(chunk_stats_t)); #endif -#ifdef JEMALLOC_DSS - if (malloc_mutex_init(&dss_mtx)) +#ifdef JEMALLOC_SWAP + if (chunk_swap_boot()) + return (true); +#endif +#ifdef JEMALLOC_DSS + if (chunk_dss_boot()) return (true); - dss_base = sbrk(0); - dss_prev = dss_base; - dss_max = dss_base; - extent_tree_szad_new(&dss_chunks_szad); - extent_tree_ad_new(&dss_chunks_ad); #endif return (false); diff --git a/jemalloc/src/jemalloc_chunk_dss.c b/jemalloc/src/jemalloc_chunk_dss.c new file mode 100644 index 00000000..4a4bb5f8 --- /dev/null +++ b/jemalloc/src/jemalloc_chunk_dss.c @@ -0,0 +1,267 @@ +#define JEMALLOC_CHUNK_DSS_C_ +#include "internal/jemalloc_internal.h" +#ifdef JEMALLOC_DSS +/******************************************************************************/ +/* Data. */ + +malloc_mutex_t dss_mtx; + +/* Base address of the DSS. */ +static void *dss_base; +/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ +static void *dss_prev; +/* Current upper limit on DSS addresses. */ +static void *dss_max; + +/* + * Trees of chunks that were previously allocated (trees differ only in node + * ordering). These are used when allocating chunks, in an attempt to re-use + * address space. Depending on function, different tree orderings are needed, + * which is why there are two trees with the same contents. + */ +static extent_tree_t dss_chunks_szad; +static extent_tree_t dss_chunks_ad; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *chunk_recycle_dss(size_t size, bool zero); +static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); + +/******************************************************************************/ + +static void * +chunk_recycle_dss(size_t size, bool zero) +{ + extent_node_t *node, key; + + key.addr = NULL; + key.size = size; + malloc_mutex_lock(&dss_mtx); + node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); + if (node != NULL) { + void *ret = node->addr; + + /* Remove node from the tree. */ + extent_tree_szad_remove(&dss_chunks_szad, node); + if (node->size == size) { + extent_tree_ad_remove(&dss_chunks_ad, node); + base_node_dealloc(node); + } else { + /* + * Insert the remainder of node's address range as a + * smaller chunk. Its position within dss_chunks_ad + * does not change. + */ + assert(node->size > size); + node->addr = (void *)((uintptr_t)node->addr + size); + node->size -= size; + extent_tree_szad_insert(&dss_chunks_szad, node); + } + malloc_mutex_unlock(&dss_mtx); + + if (zero) + memset(ret, 0, size); + return (ret); + } + malloc_mutex_unlock(&dss_mtx); + + return (NULL); +} + +void * +chunk_alloc_dss(size_t size, bool zero) +{ + void *ret; + + ret = chunk_recycle_dss(size, zero); + if (ret != NULL) + return (ret); + + /* + * sbrk() uses a signed increment argument, so take care not to + * interpret a huge allocation request as a negative increment. + */ + if ((intptr_t)size < 0) + return (NULL); + + malloc_mutex_lock(&dss_mtx); + if (dss_prev != (void *)-1) { + intptr_t incr; + + /* + * The loop is necessary to recover from races with other + * threads that are using the DSS for something other than + * malloc. + */ + do { + /* Get the current end of the DSS. */ + dss_max = sbrk(0); + + /* + * Calculate how much padding is necessary to + * chunk-align the end of the DSS. + */ + incr = (intptr_t)size + - (intptr_t)CHUNK_ADDR2OFFSET(dss_max); + if (incr == (intptr_t)size) + ret = dss_max; + else { + ret = (void *)((intptr_t)dss_max + incr); + incr += size; + } + + dss_prev = sbrk(incr); + if (dss_prev == dss_max) { + /* Success. */ + dss_max = (void *)((intptr_t)dss_prev + incr); + malloc_mutex_unlock(&dss_mtx); + return (ret); + } + } while (dss_prev != (void *)-1); + } + malloc_mutex_unlock(&dss_mtx); + + return (NULL); +} + +static extent_node_t * +chunk_dealloc_dss_record(void *chunk, size_t size) +{ + extent_node_t *xnode, *node, *prev, key; + + xnode = NULL; + while (true) { + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(&dss_chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. + * This does not change the position within + * dss_chunks_ad, so only remove/insert from/into + * dss_chunks_szad. + */ + extent_tree_szad_remove(&dss_chunks_szad, node); + node->addr = chunk; + node->size += size; + extent_tree_szad_insert(&dss_chunks_szad, node); + break; + } else if (xnode == NULL) { + /* + * It is possible that base_node_alloc() will cause a + * new base chunk to be allocated, so take care not to + * deadlock on dss_mtx, and recover if another thread + * deallocates an adjacent chunk while this one is busy + * allocating xnode. + */ + malloc_mutex_unlock(&dss_mtx); + xnode = base_node_alloc(); + malloc_mutex_lock(&dss_mtx); + if (xnode == NULL) + return (NULL); + } else { + /* Coalescing forward failed, so insert a new node. */ + node = xnode; + xnode = NULL; + node->addr = chunk; + node->size = size; + extent_tree_ad_insert(&dss_chunks_ad, node); + extent_tree_szad_insert(&dss_chunks_szad, node); + break; + } + } + /* Discard xnode if it ended up unused do to a race. */ + if (xnode != NULL) + base_node_dealloc(xnode); + + /* Try to coalesce backward. */ + prev = extent_tree_ad_prev(&dss_chunks_ad, node); + if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == + chunk) { + /* + * Coalesce chunk with the previous address range. This does + * not change the position within dss_chunks_ad, so only + * remove/insert node from/into dss_chunks_szad. + */ + extent_tree_szad_remove(&dss_chunks_szad, prev); + extent_tree_ad_remove(&dss_chunks_ad, prev); + + extent_tree_szad_remove(&dss_chunks_szad, node); + node->addr = prev->addr; + node->size += prev->size; + extent_tree_szad_insert(&dss_chunks_szad, node); + + base_node_dealloc(prev); + } + + return (node); +} + +bool +chunk_dealloc_dss(void *chunk, size_t size) +{ + bool ret; + + malloc_mutex_lock(&dss_mtx); + if ((uintptr_t)chunk >= (uintptr_t)dss_base + && (uintptr_t)chunk < (uintptr_t)dss_max) { + extent_node_t *node; + + /* Try to coalesce with other unused chunks. */ + node = chunk_dealloc_dss_record(chunk, size); + if (node != NULL) { + chunk = node->addr; + size = node->size; + } + + /* Get the current end of the DSS. */ + dss_max = sbrk(0); + + /* + * Try to shrink the DSS if this chunk is at the end of the + * DSS. The sbrk() call here is subject to a race condition + * with threads that use brk(2) or sbrk(2) directly, but the + * alternative would be to leak memory for the sake of poorly + * designed multi-threaded programs. + */ + if ((void *)((uintptr_t)chunk + size) == dss_max + && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) { + /* Success. */ + dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size); + + if (node != NULL) { + extent_tree_szad_remove(&dss_chunks_szad, node); + extent_tree_ad_remove(&dss_chunks_ad, node); + base_node_dealloc(node); + } + } else + madvise(chunk, size, MADV_DONTNEED); + + ret = false; + goto RETURN; + } + + ret = true +RETURN: + malloc_mutex_unlock(&dss_mtx); + return (ret); +} + +bool +chunk_dss_boot(void) +{ + + if (malloc_mutex_init(&dss_mtx)) + return (true); + dss_base = sbrk(0); + dss_prev = dss_base; + dss_max = dss_base; + extent_tree_szad_new(&dss_chunks_szad); + extent_tree_ad_new(&dss_chunks_ad); + + return (false); +} + +/******************************************************************************/ +#endif /* JEMALLOC_DSS */ diff --git a/jemalloc/src/jemalloc_chunk_mmap.c b/jemalloc/src/jemalloc_chunk_mmap.c new file mode 100644 index 00000000..8e2c8048 --- /dev/null +++ b/jemalloc/src/jemalloc_chunk_mmap.c @@ -0,0 +1,198 @@ +#define JEMALLOC_CHUNK_MMAP_C_ +#include "internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +/* + * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and + * potentially avoid some system calls. We can get away without TLS here, + * since the state of mmap_unaligned only affects performance, rather than + * correct function. + */ +static +#ifndef NO_TLS + __thread +#endif + bool mmap_unaligned +#ifndef NO_TLS + JEMALLOC_ATTR(tls_model("initial-exec")) +#endif + ; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *pages_map(void *addr, size_t size); +static void pages_unmap(void *addr, size_t size); +static void *chunk_alloc_mmap_slow(size_t size, bool unaligned); + +/******************************************************************************/ + +static void * +pages_map(void *addr, size_t size) +{ + void *ret; + + /* + * We don't use MAP_FIXED here, because it can cause the *replacement* + * of existing mappings, and we only want to create new mappings. + */ + ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, + -1, 0); + assert(ret != NULL); + + if (ret == MAP_FAILED) + ret = NULL; + else if (addr != NULL && ret != addr) { + /* + * We succeeded in mapping memory, but not in the right place. + */ + if (munmap(ret, size) == -1) { + char buf[STRERROR_BUF]; + + strerror_r(errno, buf, sizeof(buf)); + malloc_write4("", ": Error in munmap(): ", + buf, "\n"); + if (opt_abort) + abort(); + } + ret = NULL; + } + + assert(ret == NULL || (addr == NULL && ret != addr) + || (addr != NULL && ret == addr)); + return (ret); +} + +static void +pages_unmap(void *addr, size_t size) +{ + + if (munmap(addr, size) == -1) { + char buf[STRERROR_BUF]; + + strerror_r(errno, buf, sizeof(buf)); + malloc_write4("", ": Error in munmap(): ", buf, "\n"); + if (opt_abort) + abort(); + } +} + +static void * +chunk_alloc_mmap_slow(size_t size, bool unaligned) +{ + void *ret; + size_t offset; + + /* Beware size_t wrap-around. */ + if (size + chunksize <= size) + return (NULL); + + ret = pages_map(NULL, size + chunksize); + if (ret == NULL) + return (NULL); + + /* Clean up unneeded leading/trailing space. */ + offset = CHUNK_ADDR2OFFSET(ret); + if (offset != 0) { + /* Note that mmap() returned an unaligned mapping. */ + unaligned = true; + + /* Leading space. */ + pages_unmap(ret, chunksize - offset); + + ret = (void *)((uintptr_t)ret + + (chunksize - offset)); + + /* Trailing space. */ + pages_unmap((void *)((uintptr_t)ret + size), + offset); + } else { + /* Trailing space only. */ + pages_unmap((void *)((uintptr_t)ret + size), + chunksize); + } + + /* + * If mmap() returned an aligned mapping, reset mmap_unaligned so that + * the next chunk_alloc_mmap() execution tries the fast allocation + * method. + */ + if (unaligned == false) + mmap_unaligned = false; + + return (ret); +} + +void * +chunk_alloc_mmap(size_t size) +{ + void *ret; + + /* + * Ideally, there would be a way to specify alignment to mmap() (like + * NetBSD has), but in the absence of such a feature, we have to work + * hard to efficiently create aligned mappings. The reliable, but + * slow method is to create a mapping that is over-sized, then trim the + * excess. However, that always results in at least one call to + * pages_unmap(). + * + * A more optimistic approach is to try mapping precisely the right + * amount, then try to append another mapping if alignment is off. In + * practice, this works out well as long as the application is not + * interleaving mappings via direct mmap() calls. If we do run into a + * situation where there is an interleaved mapping and we are unable to + * extend an unaligned mapping, our best option is to switch to the + * slow method until mmap() returns another aligned mapping. This will + * tend to leave a gap in the memory map that is too small to cause + * later problems for the optimistic method. + * + * Another possible confounding factor is address space layout + * randomization (ASLR), which causes mmap(2) to disregard the + * requested address. mmap_unaligned tracks whether the previous + * chunk_alloc_mmap() execution received any unaligned or relocated + * mappings, and if so, the current execution will immediately fall + * back to the slow method. However, we keep track of whether the fast + * method would have succeeded, and if so, we make a note to try the + * fast method next time. + */ + + if (mmap_unaligned == false) { + size_t offset; + + ret = pages_map(NULL, size); + if (ret == NULL) + return (NULL); + + offset = CHUNK_ADDR2OFFSET(ret); + if (offset != 0) { + mmap_unaligned = true; + /* Try to extend chunk boundary. */ + if (pages_map((void *)((uintptr_t)ret + size), + chunksize - offset) == NULL) { + /* + * Extension failed. Clean up, then revert to + * the reliable-but-expensive method. + */ + pages_unmap(ret, size); + ret = chunk_alloc_mmap_slow(size, true); + } else { + /* Clean up unneeded leading space. */ + pages_unmap(ret, chunksize - offset); + ret = (void *)((uintptr_t)ret + (chunksize - + offset)); + } + } + } + ret = chunk_alloc_mmap_slow(size, false); + + return (ret); +} + +void +chunk_dealloc_mmap(void *chunk, size_t size) +{ + + pages_unmap(chunk, size); +} diff --git a/jemalloc/src/jemalloc_chunk_swap.c b/jemalloc/src/jemalloc_chunk_swap.c new file mode 100644 index 00000000..a0cb40b0 --- /dev/null +++ b/jemalloc/src/jemalloc_chunk_swap.c @@ -0,0 +1,354 @@ +#define JEMALLOC_CHUNK_SWAP_C_ +#include "internal/jemalloc_internal.h" +#ifdef JEMALLOC_SWAP +/******************************************************************************/ +/* Data. */ + +malloc_mutex_t swap_mtx; +bool swap_enabled; +#ifdef JEMALLOC_STATS +size_t swap_avail; +#endif + +static bool swap_prezeroed; + +/* Base address of the mmap()ed file(s). */ +static void *swap_base; +/* Current end of the space in use (<= swap_max). */ +static void *swap_end; +/* Absolute upper limit on file-backed addresses. */ +static void *swap_max; + +/* + * Trees of chunks that were previously allocated (trees differ only in node + * ordering). These are used when allocating chunks, in an attempt to re-use + * address space. Depending on function, different tree orderings are needed, + * which is why there are two trees with the same contents. + */ +static extent_tree_t swap_chunks_szad; +static extent_tree_t swap_chunks_ad; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *chunk_recycle_swap(size_t size, bool zero); +static extent_node_t *chunk_dealloc_swap_record(void *chunk, size_t size); + +/******************************************************************************/ + +static void * +chunk_recycle_swap(size_t size, bool zero) +{ + extent_node_t *node, key; + + key.addr = NULL; + key.size = size; + malloc_mutex_lock(&swap_mtx); + node = extent_tree_szad_nsearch(&swap_chunks_szad, &key); + if (node != NULL) { + void *ret = node->addr; + + /* Remove node from the tree. */ + extent_tree_szad_remove(&swap_chunks_szad, node); + if (node->size == size) { + extent_tree_ad_remove(&swap_chunks_ad, node); + base_node_dealloc(node); + } else { + /* + * Insert the remainder of node's address range as a + * smaller chunk. Its position within swap_chunks_ad + * does not change. + */ + assert(node->size > size); + node->addr = (void *)((uintptr_t)node->addr + size); + node->size -= size; + extent_tree_szad_insert(&swap_chunks_szad, node); + } +#ifdef JEMALLOC_STATS + swap_avail -= size; +#endif + malloc_mutex_unlock(&swap_mtx); + + if (zero) + memset(ret, 0, size); + return (ret); + } + malloc_mutex_unlock(&swap_mtx); + + return (NULL); +} + +void * +chunk_alloc_swap(size_t size, bool zero) +{ + void *ret; + + assert(swap_enabled); + + ret = chunk_recycle_swap(size, zero); + if (ret != NULL) + return (ret); + + malloc_mutex_lock(&swap_mtx); + if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) { + ret = swap_end; + swap_end = (void *)((uintptr_t)swap_end + size); +#ifdef JEMALLOC_STATS + swap_avail -= size; +#endif + malloc_mutex_unlock(&swap_mtx); + + if (zero && swap_prezeroed == false) + memset(ret, 0, size); + } else { + malloc_mutex_unlock(&swap_mtx); + return (NULL); + } + + return (ret); +} + +static extent_node_t * +chunk_dealloc_swap_record(void *chunk, size_t size) +{ + extent_node_t *xnode, *node, *prev, key; + + xnode = NULL; + while (true) { + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(&swap_chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. + * This does not change the position within + * swap_chunks_ad, so only remove/insert from/into + * swap_chunks_szad. + */ + extent_tree_szad_remove(&swap_chunks_szad, node); + node->addr = chunk; + node->size += size; + extent_tree_szad_insert(&swap_chunks_szad, node); + break; + } else if (xnode == NULL) { + /* + * It is possible that base_node_alloc() will cause a + * new base chunk to be allocated, so take care not to + * deadlock on swap_mtx, and recover if another thread + * deallocates an adjacent chunk while this one is busy + * allocating xnode. + */ + malloc_mutex_unlock(&swap_mtx); + xnode = base_node_alloc(); + malloc_mutex_lock(&swap_mtx); + if (xnode == NULL) + return (NULL); + } else { + /* Coalescing forward failed, so insert a new node. */ + node = xnode; + xnode = NULL; + node->addr = chunk; + node->size = size; + extent_tree_ad_insert(&swap_chunks_ad, node); + extent_tree_szad_insert(&swap_chunks_szad, node); + break; + } + } + /* Discard xnode if it ended up unused do to a race. */ + if (xnode != NULL) + base_node_dealloc(xnode); + + /* Try to coalesce backward. */ + prev = extent_tree_ad_prev(&swap_chunks_ad, node); + if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == + chunk) { + /* + * Coalesce chunk with the previous address range. This does + * not change the position within swap_chunks_ad, so only + * remove/insert node from/into swap_chunks_szad. + */ + extent_tree_szad_remove(&swap_chunks_szad, prev); + extent_tree_ad_remove(&swap_chunks_ad, prev); + + extent_tree_szad_remove(&swap_chunks_szad, node); + node->addr = prev->addr; + node->size += prev->size; + extent_tree_szad_insert(&swap_chunks_szad, node); + + base_node_dealloc(prev); + } + + return (node); +} + +bool +chunk_dealloc_swap(void *chunk, size_t size) +{ + bool ret; + + assert(swap_enabled); + + malloc_mutex_lock(&swap_mtx); + if ((uintptr_t)chunk >= (uintptr_t)swap_base + && (uintptr_t)chunk < (uintptr_t)swap_max) { + extent_node_t *node; + + /* Try to coalesce with other unused chunks. */ + node = chunk_dealloc_swap_record(chunk, size); + if (node != NULL) { + chunk = node->addr; + size = node->size; + } + + /* + * Try to shrink the in-use memory if this chunk is at the end + * of the in-use memory. + */ + if ((void *)((uintptr_t)chunk + size) == swap_end) { + swap_end = (void *)((uintptr_t)swap_end - size); + + if (node != NULL) { + extent_tree_szad_remove(&swap_chunks_szad, + node); + extent_tree_ad_remove(&swap_chunks_ad, node); + base_node_dealloc(node); + } + } else + madvise(chunk, size, MADV_DONTNEED); + + ret = false; + goto RETURN; + } + + ret = true; +RETURN: +#ifdef JEMALLOC_STATS + swap_avail += size; +#endif + malloc_mutex_unlock(&swap_mtx); + return (ret); +} + +bool +chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) +{ + bool ret; + unsigned i; + off_t off; + void *vaddr; + size_t cumsize, voff; + size_t sizes[nfds]; + + malloc_mutex_lock(&swap_mtx); + + /* Get file sizes. */ + for (i = 0, cumsize = 0; i < nfds; i++) { + off = lseek(fds[i], 0, SEEK_END); + if (off == ((off_t)-1)) { + ret = true; + goto RETURN; + } + if (PAGE_CEILING(off) != off) { + /* Truncate to a multiple of the page size. */ + off &= ~PAGE_MASK; + if (ftruncate(fds[i], off) != 0) { + ret = true; + goto RETURN; + } + } + sizes[i] = off; + if (cumsize + off < cumsize) { + /* + * Cumulative file size is greater than the total + * address space. Bail out while it's still obvious + * what the problem is. + */ + ret = true; + goto RETURN; + } + cumsize += off; + } + + /* Round down to a multiple of the chunk size. */ + cumsize &= ~chunksize_mask; + if (cumsize == 0) { + ret = true; + goto RETURN; + } + + /* + * Allocate a chunk-aligned region of anonymous memory, which will + * be the final location for the memory-mapped files. + */ + vaddr = chunk_alloc_mmap(cumsize); + if (vaddr == NULL) { + ret = true; + goto RETURN; + } + + /* Overlay the files onto the anonymous mapping. */ + for (i = 0, voff = 0; i < nfds; i++) { + void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i], + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0); + if (addr == MAP_FAILED) { + char buf[STRERROR_BUF]; + + strerror_r(errno, buf, sizeof(buf)); + malloc_write4("", + ": Error in mmap(..., MAP_FIXED, ...): ", + buf, "\n"); + if (opt_abort) + abort(); + if (munmap(vaddr, voff) == -1) { + strerror_r(errno, buf, sizeof(buf)); + malloc_write4("", + ": Error in munmap(): ", buf, "\n"); + } + ret = true; + goto RETURN; + } + assert(addr == (void *)((uintptr_t)vaddr + voff)); + voff += sizes[i]; + } + + swap_prezeroed = prezeroed; + swap_base = vaddr; + swap_end = swap_base; + swap_max = (void *)((uintptr_t)vaddr + cumsize); + + swap_enabled = true; + +#ifdef JEMALLOC_STATS + swap_avail = cumsize; +#endif + + ret = false; +RETURN: + malloc_mutex_unlock(&swap_mtx); + return (ret); +} + +bool +chunk_swap_boot(void) +{ + + if (malloc_mutex_init(&swap_mtx)) + return (true); + + swap_enabled = false; +#ifdef JEMALLOC_STATS + swap_avail = 0; +#endif + swap_prezeroed = false; + swap_base = NULL; + swap_end = NULL; + swap_max = NULL; + + extent_tree_szad_new(&swap_chunks_szad); + extent_tree_ad_new(&swap_chunks_ad); + + return (false); +} + +/******************************************************************************/ +#endif /* JEMALLOC_SWAP */ diff --git a/jemalloc/src/jemalloc_defs.h.in b/jemalloc/src/jemalloc_defs.h.in index f43d4759..393834b9 100644 --- a/jemalloc/src/jemalloc_defs.h.in +++ b/jemalloc/src/jemalloc_defs.h.in @@ -67,6 +67,9 @@ */ #undef JEMALLOC_DSS +/* JEMALLOC_SWAP enables mmap()ed swap file support. */ +#undef JEMALLOC_SWAP + /* Support memory filling (junk/zero). */ #undef JEMALLOC_FILL diff --git a/jemalloc/src/jemalloc_extent.c b/jemalloc/src/jemalloc_extent.c index cbe7c4b0..1aa96a72 100644 --- a/jemalloc/src/jemalloc_extent.c +++ b/jemalloc/src/jemalloc_extent.c @@ -3,7 +3,7 @@ /******************************************************************************/ -#ifdef JEMALLOC_DSS +#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) static inline int extent_szad_comp(extent_node_t *a, extent_node_t *b) { diff --git a/jemalloc/src/jemalloc_huge.c b/jemalloc/src/jemalloc_huge.c index b325927e..78551795 100644 --- a/jemalloc/src/jemalloc_huge.c +++ b/jemalloc/src/jemalloc_huge.c @@ -208,7 +208,7 @@ huge_dalloc(void *ptr) /* Unmap chunk. */ #ifdef JEMALLOC_FILL -#ifdef JEMALLOC_DSS +#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) if (opt_junk) memset(node->addr, 0x5a, node->size); #endif diff --git a/jemalloc/src/jemalloc_stats.c b/jemalloc/src/jemalloc_stats.c index b95c07fb..979eb796 100644 --- a/jemalloc/src/jemalloc_stats.c +++ b/jemalloc/src/jemalloc_stats.c @@ -104,13 +104,11 @@ malloc_printf(const char *format, ...) malloc_vcprintf(NULL, NULL, format, ap); va_end(ap); } - #endif -JEMALLOC_ATTR(visibility("default")) void -JEMALLOC_P(malloc_stats_print)(void (*write4)(void *, const char *, - const char *, const char *, const char *), void *w4opaque, const char *opts) +stats_print(void (*write4)(void *, const char *, const char *, const char *, + const char *), void *w4opaque, const char *opts) { char s[UMAX2S_BUFSIZE]; bool general = true; @@ -167,6 +165,9 @@ JEMALLOC_P(malloc_stats_print)(void (*write4)(void *, const char *, opt_abort ? "A" : "a", "", ""); #ifdef JEMALLOC_FILL write4(w4opaque, opt_junk ? "J" : "j", "", "", ""); +#endif +#ifdef JEMALLOC_SWAP + write4(w4opaque, opt_overcommit ? "O" : "o", "", "", ""); #endif write4(w4opaque, "P", "", "", ""); #ifdef JEMALLOC_TCACHE @@ -271,10 +272,6 @@ JEMALLOC_P(malloc_stats_print)(void (*write4)(void *, const char *, mapped = stats_chunks.curchunks * chunksize; malloc_mutex_unlock(&huge_mtx); - malloc_mutex_lock(&base_mtx); - mapped += base_mapped; - malloc_mutex_unlock(&base_mtx); - malloc_cprintf(write4, w4opaque, "Allocated: %zu, mapped: %zu\n", allocated, mapped); @@ -287,10 +284,22 @@ JEMALLOC_P(malloc_stats_print)(void (*write4)(void *, const char *, malloc_mutex_unlock(&huge_mtx); malloc_cprintf(write4, w4opaque, "chunks: nchunks " - "highchunks curchunks\n"); - malloc_cprintf(write4, w4opaque, " %13llu%13lu%13lu\n", + "highchunks curchunks" +#ifdef JEMALLOC_SWAP + " swap_avail" +#endif + "\n"); + malloc_cprintf(write4, w4opaque, " %13llu%13lu%13lu" +#ifdef JEMALLOC_SWAP + "%13zu" +#endif + "\n", chunks_stats.nchunks, chunks_stats.highchunks, - chunks_stats.curchunks); + chunks_stats.curchunks +#ifdef JEMALLOC_SWAP + , (swap_avail >> opt_lg_chunk) +#endif + ); } /* Print chunk stats. */