2010-01-17 01:53:50 +08:00
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_TYPES
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Size and alignment of memory chunks that are allocated by the OS's virtual
|
|
|
|
* memory system.
|
|
|
|
*/
|
2015-03-07 12:05:16 +08:00
|
|
|
#define LG_CHUNK_DEFAULT 18
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
/* Return the chunk address for allocation address a. */
|
|
|
|
#define CHUNK_ADDR2BASE(a) \
|
|
|
|
((void *)((uintptr_t)(a) & ~chunksize_mask))
|
|
|
|
|
|
|
|
/* Return the chunk offset of address a. */
|
|
|
|
#define CHUNK_ADDR2OFFSET(a) \
|
|
|
|
((size_t)((uintptr_t)(a) & chunksize_mask))
|
|
|
|
|
|
|
|
/* Return the smallest chunk multiple that is >= s. */
|
|
|
|
#define CHUNK_CEILING(s) \
|
|
|
|
(((s) + chunksize_mask) & ~chunksize_mask)
|
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_TYPES */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_STRUCTS
|
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_STRUCTS */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_EXTERNS
|
|
|
|
|
2010-01-24 18:53:40 +08:00
|
|
|
extern size_t opt_lg_chunk;
|
2012-10-12 04:53:15 +08:00
|
|
|
extern const char *opt_dss;
|
2010-01-24 18:53:40 +08:00
|
|
|
|
2015-01-31 14:54:08 +08:00
|
|
|
extern rtree_t chunks_rtree;
|
2010-09-06 01:35:13 +08:00
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
extern size_t chunksize;
|
|
|
|
extern size_t chunksize_mask; /* (chunksize - 1). */
|
|
|
|
extern size_t chunk_npages;
|
|
|
|
|
Move centralized chunk management into arenas.
Migrate all centralized data structures related to huge allocations and
recyclable chunks into arena_t, so that each arena can manage huge
allocations and recyclable virtual memory completely independently of
other arenas.
Add chunk node caching to arenas, in order to avoid contention on the
base allocator.
Use chunks_rtree to look up huge allocations rather than a red-black
tree. Maintain a per arena unsorted list of huge allocations (which
will be needed to enumerate huge allocations during arena reset).
Remove the --enable-ivsalloc option, make ivsalloc() always available,
and use it for size queries if --enable-debug is enabled. The only
practical implications to this removal are that 1) ivsalloc() is now
always available during live debugging (and the underlying radix tree is
available during core-based debugging), and 2) size query validation can
no longer be enabled independent of --enable-debug.
Remove the stats.chunks.{current,total,high} mallctls, and replace their
underlying statistics with simpler atomically updated counters used
exclusively for gdump triggering. These statistics are no longer very
useful because each arena manages chunks independently, and per arena
statistics provide similar information.
Simplify chunk synchronization code, now that base chunk allocation
cannot cause recursive lock acquisition.
2015-02-12 04:24:27 +08:00
|
|
|
bool chunk_register(const void *chunk, const extent_node_t *node);
|
|
|
|
void chunk_deregister(const void *chunk, const extent_node_t *node);
|
2014-05-16 13:22:27 +08:00
|
|
|
void *chunk_alloc_base(size_t size);
|
2015-02-19 08:40:53 +08:00
|
|
|
void *chunk_alloc_cache(arena_t *arena, void *new_addr, size_t size,
|
|
|
|
size_t alignment, bool *zero, bool dalloc_node);
|
Attempt to expand huge allocations in-place.
This adds support for expanding huge allocations in-place by requesting
memory at a specific address from the chunk allocator.
It's currently only implemented for the chunk recycling path, although
in theory it could also be done by optimistically allocating new chunks.
On Linux, it could attempt an in-place mremap. However, that won't work
in practice since the heap is grown downwards and memory is not unmapped
(in a normal build, at least).
Repeated vector reallocation micro-benchmark:
#include <string.h>
#include <stdlib.h>
int main(void) {
for (size_t i = 0; i < 100; i++) {
void *ptr = NULL;
size_t old_size = 0;
for (size_t size = 4; size < (1 << 30); size *= 2) {
ptr = realloc(ptr, size);
if (!ptr) return 1;
memset(ptr + old_size, 0xff, size - old_size);
old_size = size;
}
free(ptr);
}
}
The glibc allocator fails to do any in-place reallocations on this
benchmark once it passes the M_MMAP_THRESHOLD (default 128k) but it
elides the cost of copies via mremap, which is currently not something
that jemalloc can use.
With this improvement, jemalloc still fails to do any in-place huge
reallocations for the first outer loop, but then succeeds 100% of the
time for the remaining 99 iterations. The time spent doing allocations
and copies drops down to under 5%, with nearly all of it spent doing
purging + faulting (when huge pages are disabled) and the array memset.
An improved mremap API (MREMAP_RETAIN - #138) would be far more general
but this is a portable optimization and would still be useful on Linux
for xallocx.
Numbers with transparent huge pages enabled:
glibc (copies elided via MREMAP_MAYMOVE): 8.471s
jemalloc: 17.816s
jemalloc + no-op madvise: 13.236s
jemalloc + this commit: 6.787s
jemalloc + this commit + no-op madvise: 6.144s
Numbers with transparent huge pages disabled:
glibc (copies elided via MREMAP_MAYMOVE): 15.403s
jemalloc: 39.456s
jemalloc + no-op madvise: 12.768s
jemalloc + this commit: 15.534s
jemalloc + this commit + no-op madvise: 6.354s
Closes #137
2014-10-04 13:39:32 +08:00
|
|
|
void *chunk_alloc_default(void *new_addr, size_t size, size_t alignment,
|
|
|
|
bool *zero, unsigned arena_ind);
|
2015-02-19 08:40:53 +08:00
|
|
|
void *chunk_alloc_wrapper(arena_t *arena, chunk_alloc_t *chunk_alloc,
|
|
|
|
void *new_addr, size_t size, size_t alignment, bool *zero);
|
2015-02-16 10:04:46 +08:00
|
|
|
void chunk_record(arena_t *arena, extent_tree_t *chunks_szad,
|
2015-02-18 17:15:50 +08:00
|
|
|
extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size,
|
|
|
|
bool zeroed);
|
2015-02-19 08:40:53 +08:00
|
|
|
void chunk_dalloc_cache(arena_t *arena, void *chunk, size_t size);
|
|
|
|
void chunk_dalloc_arena(arena_t *arena, void *chunk, size_t size,
|
|
|
|
bool zeroed);
|
2014-05-16 13:22:27 +08:00
|
|
|
bool chunk_dalloc_default(void *chunk, size_t size, unsigned arena_ind);
|
2015-02-19 08:40:53 +08:00
|
|
|
void chunk_dalloc_wrapper(arena_t *arena, chunk_dalloc_t *chunk_dalloc,
|
|
|
|
void *chunk, size_t size);
|
2012-04-22 10:17:21 +08:00
|
|
|
bool chunk_boot(void);
|
2012-10-10 05:46:22 +08:00
|
|
|
void chunk_prefork(void);
|
|
|
|
void chunk_postfork_parent(void);
|
|
|
|
void chunk_postfork_child(void);
|
2010-01-17 01:53:50 +08:00
|
|
|
|
|
|
|
#endif /* JEMALLOC_H_EXTERNS */
|
|
|
|
/******************************************************************************/
|
|
|
|
#ifdef JEMALLOC_H_INLINES
|
|
|
|
|
Move centralized chunk management into arenas.
Migrate all centralized data structures related to huge allocations and
recyclable chunks into arena_t, so that each arena can manage huge
allocations and recyclable virtual memory completely independently of
other arenas.
Add chunk node caching to arenas, in order to avoid contention on the
base allocator.
Use chunks_rtree to look up huge allocations rather than a red-black
tree. Maintain a per arena unsorted list of huge allocations (which
will be needed to enumerate huge allocations during arena reset).
Remove the --enable-ivsalloc option, make ivsalloc() always available,
and use it for size queries if --enable-debug is enabled. The only
practical implications to this removal are that 1) ivsalloc() is now
always available during live debugging (and the underlying radix tree is
available during core-based debugging), and 2) size query validation can
no longer be enabled independent of --enable-debug.
Remove the stats.chunks.{current,total,high} mallctls, and replace their
underlying statistics with simpler atomically updated counters used
exclusively for gdump triggering. These statistics are no longer very
useful because each arena manages chunks independently, and per arena
statistics provide similar information.
Simplify chunk synchronization code, now that base chunk allocation
cannot cause recursive lock acquisition.
2015-02-12 04:24:27 +08:00
|
|
|
#ifndef JEMALLOC_ENABLE_INLINE
|
|
|
|
extent_node_t *chunk_lookup(const void *chunk);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_))
|
|
|
|
JEMALLOC_INLINE extent_node_t *
|
|
|
|
chunk_lookup(const void *chunk)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (rtree_get(&chunks_rtree, (uintptr_t)chunk));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2010-01-17 01:53:50 +08:00
|
|
|
#endif /* JEMALLOC_H_INLINES */
|
|
|
|
/******************************************************************************/
|
2010-01-24 18:53:40 +08:00
|
|
|
|
2010-02-12 06:45:59 +08:00
|
|
|
#include "jemalloc/internal/chunk_dss.h"
|
|
|
|
#include "jemalloc/internal/chunk_mmap.h"
|