Port to Mac OS X.

Add Mac OS X support, based in large part on the OS X support in
Mozilla's version of jemalloc.
This commit is contained in:
Jason Evans
2010-09-05 10:35:13 -07:00
parent b267d0f86a
commit 2dbecf1f62
26 changed files with 1146 additions and 290 deletions

View File

@@ -39,13 +39,17 @@ extern malloc_mutex_t chunks_mtx;
extern chunk_stats_t stats_chunks;
#endif
#ifdef JEMALLOC_IVSALLOC
extern rtree_t *chunks_rtree;
#endif
extern size_t chunksize;
extern size_t chunksize_mask; /* (chunksize - 1). */
extern size_t chunk_npages;
extern size_t arena_chunk_header_npages;
extern size_t arena_maxclass; /* Max size class for arenas. */
void *chunk_alloc(size_t size, bool *zero);
void *chunk_alloc(size_t size, bool base, bool *zero);
void chunk_dealloc(void *chunk, size_t size);
bool chunk_boot(void);

View File

@@ -13,6 +13,8 @@ void *chunk_alloc_mmap(size_t size);
void *chunk_alloc_mmap_noreserve(size_t size);
void chunk_dealloc_mmap(void *chunk, size_t size);
bool chunk_mmap_boot(void);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES

View File

@@ -27,6 +27,13 @@
#define JEMALLOC_MANGLE
#include "../jemalloc@install_suffix@.h"
#ifdef JEMALLOC_ZONE
#include <mach/mach_error.h>
#include <mach/mach_init.h>
#include <mach/vm_map.h>
#include <malloc/malloc.h>
#endif
#ifdef JEMALLOC_LAZY_LOCK
#include <dlfcn.h>
#endif
@@ -159,6 +166,16 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT))
#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1))
#ifdef PAGE_SHIFT
# undef PAGE_SHIFT
#endif
#ifdef PAGE_SIZE
# undef PAGE_SIZE
#endif
#ifdef PAGE_MASK
# undef PAGE_MASK
#endif
#ifdef DYNAMIC_PAGE_SHIFT
# define PAGE_SHIFT lg_pagesize
# define PAGE_SIZE pagesize
@@ -184,9 +201,13 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#include "jemalloc/internal/base.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/prof.h"
#ifdef JEMALLOC_ZONE
#include "jemalloc/internal/zone.h"
#endif
#undef JEMALLOC_H_TYPES
/******************************************************************************/
@@ -203,9 +224,13 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
#include "jemalloc/internal/base.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/prof.h"
#ifdef JEMALLOC_ZONE
#include "jemalloc/internal/zone.h"
#endif
#undef JEMALLOC_H_STRUCTS
/******************************************************************************/
@@ -240,8 +265,19 @@ extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */
* Map of pthread_self() --> arenas[???], used for selecting an arena to use
* for allocations.
*/
extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec"));
extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
# define ARENA_GET() arenas_tls
# define ARENA_SET(v) do { \
arenas_tls = (v); \
} while (0)
#else
extern pthread_key_t arenas_tsd;
# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
# define ARENA_SET(v) do { \
pthread_setspecific(arenas_tsd, (void *)(v)); \
} while (0)
#endif
/*
* Arenas that are used to service external requests. Not all elements of the
* arenas array are necessarily used; arenas are created lazily as needed.
@@ -250,9 +286,9 @@ extern arena_t **arenas;
extern unsigned narenas;
arena_t *arenas_extend(unsigned ind);
#ifndef NO_TLS
arena_t *choose_arena_hard(void);
#endif
void jemalloc_prefork(void);
void jemalloc_postfork(void);
#include "jemalloc/internal/prn.h"
#include "jemalloc/internal/ckh.h"
@@ -265,9 +301,13 @@ arena_t *choose_arena_hard(void);
#include "jemalloc/internal/base.h"
#include "jemalloc/internal/chunk.h"
#include "jemalloc/internal/huge.h"
#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/prof.h"
#ifdef JEMALLOC_ZONE
#include "jemalloc/internal/zone.h"
#endif
#undef JEMALLOC_H_EXTERNS
/******************************************************************************/
@@ -285,11 +325,30 @@ arena_t *choose_arena_hard(void);
#include "jemalloc/internal/huge.h"
#ifndef JEMALLOC_ENABLE_INLINE
size_t pow2_ceil(size_t x);
void malloc_write(const char *s);
arena_t *choose_arena(void);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
/* Compute the smallest power of 2 that is >= x. */
JEMALLOC_INLINE size_t
pow2_ceil(size_t x)
{
x--;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
#if (LG_SIZEOF_PTR == 3)
x |= x >> 32;
#endif
x++;
return (x);
}
/*
* Wrapper around malloc_message() that avoids the need for
* JEMALLOC_P(malloc_message)(...) throughout the code.
@@ -310,76 +369,33 @@ choose_arena(void)
{
arena_t *ret;
/*
* We can only use TLS if this is a PIC library, since for the static
* library version, libc's malloc is used by TLS allocation, which
* introduces a bootstrapping issue.
*/
#ifndef NO_TLS
ret = arenas_map;
ret = ARENA_GET();
if (ret == NULL) {
ret = choose_arena_hard();
assert(ret != NULL);
}
#else
if (isthreaded && narenas > 1) {
unsigned long ind;
/*
* Hash pthread_self() to one of the arenas. There is a prime
* number of arenas, so this has a reasonable chance of
* working. Even so, the hashing can be easily thwarted by
* inconvenient pthread_self() values. Without specific
* knowledge of how pthread_self() calculates values, we can't
* easily do much better than this.
*/
ind = (unsigned long) pthread_self() % narenas;
/*
* Optimistially assume that arenas[ind] has been initialized.
* At worst, we find out that some other thread has already
* done so, after acquiring the lock in preparation. Note that
* this lazy locking also has the effect of lazily forcing
* cache coherency; without the lock acquisition, there's no
* guarantee that modification of arenas[ind] by another thread
* would be seen on this CPU for an arbitrary amount of time.
*
* In general, this approach to modifying a synchronized value
* isn't a good idea, but in this case we only ever modify the
* value once, so things work out well.
*/
ret = arenas[ind];
if (ret == NULL) {
/*
* Avoid races with another thread that may have already
* initialized arenas[ind].
*/
malloc_mutex_lock(&arenas_lock);
if (arenas[ind] == NULL)
ret = arenas_extend((unsigned)ind);
else
ret = arenas[ind];
malloc_mutex_unlock(&arenas_lock);
}
} else
ret = arenas[0];
#endif
assert(ret != NULL);
return (ret);
}
#endif
#include "jemalloc/internal/rtree.h"
#include "jemalloc/internal/tcache.h"
#include "jemalloc/internal/arena.h"
#include "jemalloc/internal/hash.h"
#include "jemalloc/internal/prof.h"
#ifdef JEMALLOC_ZONE
#include "jemalloc/internal/zone.h"
#endif
#ifndef JEMALLOC_ENABLE_INLINE
void *imalloc(size_t size);
void *icalloc(size_t size);
void *ipalloc(size_t alignment, size_t size);
size_t isalloc(const void *ptr);
# ifdef JEMALLOC_IVSALLOC
size_t ivsalloc(const void *ptr);
# endif
void *iralloc(void *ptr, size_t size);
void idalloc(void *ptr);
#endif
@@ -526,6 +542,19 @@ isalloc(const void *ptr)
return (ret);
}
#ifdef JEMALLOC_IVSALLOC
JEMALLOC_INLINE size_t
ivsalloc(const void *ptr)
{
/* Return 0 if ptr is not within a chunk managed by jemalloc. */
if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL)
return (0);
return (isalloc(ptr));
}
#endif
JEMALLOC_INLINE void *
iralloc(void *ptr, size_t size)
{

View File

@@ -3,6 +3,12 @@
typedef pthread_mutex_t malloc_mutex_t;
#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
#else
# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
#endif
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS

View File

@@ -0,0 +1,161 @@
/*
* This radix tree implementation is tailored to the singular purpose of
* tracking which chunks are currently owned by jemalloc. This functionality
* is mandatory for OS X, where jemalloc must be able to respond to object
* ownership queries.
*
*******************************************************************************
*/
#ifdef JEMALLOC_H_TYPES
typedef struct rtree_s rtree_t;
/*
* Size of each radix tree node (must be a power of 2). This impacts tree
* depth.
*/
#if (LG_SIZEOF_PTR == 2)
# define RTREE_NODESIZE (1U << 14)
#else
# define RTREE_NODESIZE CACHELINE
#endif
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
struct rtree_s {
malloc_mutex_t mutex;
void **root;
unsigned height;
unsigned level2bits[1]; /* Dynamically sized. */
};
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
rtree_t *rtree_new(unsigned bits);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#ifndef JEMALLOC_ENABLE_INLINE
#ifndef JEMALLOC_DEBUG
void *rtree_get_locked(rtree_t *rtree, uintptr_t key);
#endif
void *rtree_get(rtree_t *rtree, uintptr_t key);
bool rtree_set(rtree_t *rtree, uintptr_t key, void *val);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_))
#define RTREE_GET_GENERATE(f) \
/* The least significant bits of the key are ignored. */ \
JEMALLOC_INLINE void * \
f(rtree_t *rtree, uintptr_t key) \
{ \
void *ret; \
uintptr_t subkey; \
unsigned i, lshift, height, bits; \
void **node, **child; \
\
RTREE_LOCK(&rtree->mutex); \
for (i = lshift = 0, height = rtree->height, node = rtree->root;\
i < height - 1; \
i++, lshift += bits, node = child) { \
bits = rtree->level2bits[i]; \
subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \
3)) - bits); \
child = (void**)node[subkey]; \
if (child == NULL) { \
RTREE_UNLOCK(&rtree->mutex); \
return (NULL); \
} \
} \
\
/* \
* node is a leaf, so it contains values rather than node \
* pointers. \
*/ \
bits = rtree->level2bits[i]; \
subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \
bits); \
ret = node[subkey]; \
RTREE_UNLOCK(&rtree->mutex); \
\
RTREE_GET_VALIDATE \
return (ret); \
}
#ifdef JEMALLOC_DEBUG
# define RTREE_LOCK(l) malloc_mutex_lock(l)
# define RTREE_UNLOCK(l) malloc_mutex_unlock(l)
# define RTREE_GET_VALIDATE
RTREE_GET_GENERATE(rtree_get_locked)
# undef RTREE_LOCK
# undef RTREE_UNLOCK
# undef RTREE_GET_VALIDATE
#endif
#define RTREE_LOCK(l)
#define RTREE_UNLOCK(l)
#ifdef JEMALLOC_DEBUG
/*
* Suppose that it were possible for a jemalloc-allocated chunk to be
* munmap()ped, followed by a different allocator in another thread re-using
* overlapping virtual memory, all without invalidating the cached rtree
* value. The result would be a false positive (the rtree would claim that
* jemalloc owns memory that it had actually discarded). This scenario
* seems impossible, but the following assertion is a prudent sanity check.
*/
# define RTREE_GET_VALIDATE \
assert(rtree_get_locked(rtree, key) == ret);
#else
# define RTREE_GET_VALIDATE
#endif
RTREE_GET_GENERATE(rtree_get)
#undef RTREE_LOCK
#undef RTREE_UNLOCK
#undef RTREE_GET_VALIDATE
JEMALLOC_INLINE bool
rtree_set(rtree_t *rtree, uintptr_t key, void *val)
{
uintptr_t subkey;
unsigned i, lshift, height, bits;
void **node, **child;
malloc_mutex_lock(&rtree->mutex);
for (i = lshift = 0, height = rtree->height, node = rtree->root;
i < height - 1;
i++, lshift += bits, node = child) {
bits = rtree->level2bits[i];
subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
bits);
child = (void**)node[subkey];
if (child == NULL) {
child = (void**)base_alloc(sizeof(void *) <<
rtree->level2bits[i+1]);
if (child == NULL) {
malloc_mutex_unlock(&rtree->mutex);
return (true);
}
memset(child, 0, sizeof(void *) <<
rtree->level2bits[i+1]);
node[subkey] = child;
}
}
/* node is a leaf, so it contains values rather than node pointers. */
bits = rtree->level2bits[i];
subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
node[subkey] = val;
malloc_mutex_unlock(&rtree->mutex);
return (false);
}
#endif
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/

View File

@@ -65,8 +65,21 @@ extern ssize_t opt_lg_tcache_maxclass;
extern ssize_t opt_lg_tcache_gc_sweep;
/* Map of thread-specific caches. */
#ifndef NO_TLS
extern __thread tcache_t *tcache_tls
JEMALLOC_ATTR(tls_model("initial-exec"));
# define TCACHE_GET() tcache_tls
# define TCACHE_SET(v) do { \
tcache_tls = (v); \
pthread_setspecific(tcache_tsd, (void *)(v)); \
} while (0)
#else
extern pthread_key_t tcache_tsd;
# define TCACHE_GET() ((tcache_t *)pthread_getspecific(tcache_tsd))
# define TCACHE_SET(v) do { \
pthread_setspecific(tcache_tsd, (void *)(v)); \
} while (0)
#endif
/*
* Number of tcache bins. There are nbins small-object bins, plus 0 or more
@@ -122,14 +135,23 @@ tcache_get(void)
if ((isthreaded & opt_tcache) == false)
return (NULL);
tcache = tcache_tls;
if ((uintptr_t)tcache <= (uintptr_t)1) {
tcache = TCACHE_GET();
if ((uintptr_t)tcache <= (uintptr_t)2) {
if (tcache == NULL) {
tcache = tcache_create(choose_arena());
if (tcache == NULL)
return (NULL);
} else
} else {
if (tcache == (void *)(uintptr_t)1) {
/*
* Make a note that an allocator function was
* called after the tcache_thread_cleanup() was
* called.
*/
TCACHE_SET((uintptr_t)2);
}
return (NULL);
}
}
return (tcache);

View File

@@ -0,0 +1,23 @@
#ifndef JEMALLOC_ZONE
# error "This source file is for zones on Darwin (OS X)."
#endif
/******************************************************************************/
#ifdef JEMALLOC_H_TYPES
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS
#endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/
#ifdef JEMALLOC_H_EXTERNS
malloc_zone_t *create_zone(void);
void szone2ozone(malloc_zone_t *zone);
#endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/
#ifdef JEMALLOC_H_INLINES
#endif /* JEMALLOC_H_INLINES */
/******************************************************************************/

View File

@@ -92,6 +92,34 @@
/* TLS is used to map arenas and magazine caches to threads. */
#undef NO_TLS
/*
* JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
* within jemalloc-owned chunks before dereferencing them.
*/
#undef JEMALLOC_IVSALLOC
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#undef JEMALLOC_ZONE
#undef JEMALLOC_ZONE_VERSION
/*
* Methods for purging unused pages differ between operating systems.
*
* madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
* such that new pages will be demand-zeroed if
* the address region is later touched.
* madvise(..., MADV_FREE) : On FreeBSD, this marks pages as being unused,
* such that they will be discarded rather than
* swapped out.
* msync(..., MS_KILLPAGES) : On Darwin, this behaves similarly to
* madvise(..., MADV_FREE) on FreeBSD.
*/
#undef JEMALLOC_PURGE_MADVISE_DONTNEED
#undef JEMALLOC_PURGE_MADVISE_FREE
#undef JEMALLOC_PURGE_MSYNC_KILLPAGES
/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
#undef LG_SIZEOF_PTR