From 2dbecf1f6267fae7a161b9c39cfd4d04ce168a29 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Sun, 5 Sep 2010 10:35:13 -0700 Subject: [PATCH] Port to Mac OS X. Add Mac OS X support, based in large part on the OS X support in Mozilla's version of jemalloc. --- jemalloc/COPYING | 1 + jemalloc/INSTALL | 4 + jemalloc/Makefile.in | 45 ++- jemalloc/configure.ac | 170 +++++---- jemalloc/doc/jemalloc.3.in | 31 +- jemalloc/include/jemalloc/internal/chunk.h | 6 +- .../include/jemalloc/internal/chunk_mmap.h | 2 + .../jemalloc/internal/jemalloc_internal.h.in | 137 ++++--- jemalloc/include/jemalloc/internal/mutex.h | 6 + jemalloc/include/jemalloc/internal/rtree.h | 161 ++++++++ jemalloc/include/jemalloc/internal/tcache.h | 28 +- jemalloc/include/jemalloc/internal/zone.h | 23 ++ jemalloc/include/jemalloc/jemalloc_defs.h.in | 28 ++ jemalloc/src/arena.c | 52 ++- jemalloc/src/base.c | 2 +- jemalloc/src/chunk.c | 27 +- jemalloc/src/chunk_mmap.c | 46 ++- jemalloc/src/ctl.c | 14 +- jemalloc/src/huge.c | 4 +- jemalloc/src/jemalloc.c | 90 ++--- jemalloc/src/mutex.c | 4 + jemalloc/src/prof.c | 123 ++++-- jemalloc/src/rtree.c | 42 +++ jemalloc/src/tcache.c | 26 +- jemalloc/src/zone.c | 354 ++++++++++++++++++ jemalloc/test/thread_arena.c | 10 +- 26 files changed, 1146 insertions(+), 290 deletions(-) create mode 100644 jemalloc/include/jemalloc/internal/rtree.h create mode 100644 jemalloc/include/jemalloc/internal/zone.h create mode 100644 jemalloc/src/rtree.c create mode 100644 jemalloc/src/zone.c diff --git a/jemalloc/COPYING b/jemalloc/COPYING index 1baaf50d..10ade120 100644 --- a/jemalloc/COPYING +++ b/jemalloc/COPYING @@ -3,6 +3,7 @@ subject to the following licenses: -------------------------------------------------------------------------------- Copyright (C) 2002-2010 Jason Evans . All rights reserved. +Copyright (C) 2007-2010 Mozilla Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL index eec3b37e..e9a87989 100644 --- a/jemalloc/INSTALL +++ b/jemalloc/INSTALL @@ -31,6 +31,10 @@ any of the following arguments (not a definitive list) to 'configure': becomes malloc(). This makes it possible to use jemalloc at the same time as the system allocator. + By default, the prefix is "", except on OS X, where it is "je_". On OS X, + jemalloc overlays the default malloc zone, but makes no attempt to actually + replace the "malloc", "calloc", etc. symbols. + --with-install-suffix= Append to the base name of all installed files, such that multiple versions of jemalloc can coexist in the same installation directory. For diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in index a7acc969..aa3bf6bd 100644 --- a/jemalloc/Makefile.in +++ b/jemalloc/Makefile.in @@ -28,10 +28,17 @@ LIBS := @LIBS@ RPATH_EXTRA := @RPATH_EXTRA@ ifeq (macho, @abi@) SO := dylib +WL_SONAME := dylib_install_name else SO := so +WL_SONAME := soname endif REV := 0 +ifeq (macho, @abi@) +TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib +else +TEST_LIBRARY_PATH := +endif # Lists of files. BINS := @srcroot@bin/pprof @@ -42,15 +49,16 @@ CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/base.c \ @srcroot@src/chunk_mmap.c @srcroot@src/chunk_swap.c @srcroot@src/ckh.c \ @srcroot@src/ctl.c @srcroot@src/extent.c @srcroot@src/hash.c \ @srcroot@src/huge.c @srcroot@src/mb.c @srcroot@src/mutex.c \ - @srcroot@src/prof.c @srcroot@src/stats.c @srcroot@src/tcache.c -DSOS := @objroot@lib/libjemalloc@install_suffix@.so.$(REV) \ - @objroot@lib/libjemalloc@install_suffix@.so \ + @srcroot@src/prof.c @srcroot@src/rtree.c \ + @srcroot@src/stats.c @srcroot@src/tcache.c +ifeq (macho, @abi@) +CSRCS += @srcroot@src/zone.c +endif +DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \ + @objroot@lib/libjemalloc@install_suffix@.$(SO) \ @objroot@lib/libjemalloc@install_suffix@_pic.a MAN3 := @objroot@doc/jemalloc@install_suffix@.3 -CTESTS := -ifeq (1, @enable_tls@) -CTESTS += @srcroot@test/thread_arena.c -endif +CTESTS := @srcroot@test/thread_arena.c .PHONY: all dist install check clean distclean relclean @@ -67,13 +75,13 @@ all: $(DSOS) $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" -%.so : %.so.$(REV) +%.$(SO) : %.$(SO).$(REV) @mkdir -p $(@D) ln -sf $( $(@:%.o=%.d)" @objroot@test/%: @objroot@test/%.o \ - @objroot@lib/libjemalloc@install_suffix@.so + @objroot@lib/libjemalloc@install_suffix@.$(SO) @mkdir -p $(@D) - $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc +ifneq (@RPATH@, ) + $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ +else + $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ +endif install_bin: install -d $(BINDIR) @@ -105,8 +117,8 @@ done install_lib: $(DSOS) install -d $(LIBDIR) - install -m 755 @objroot@lib/libjemalloc@install_suffix@.so.$(REV) $(LIBDIR) - ln -sf libjemalloc@install_suffix@.so.$(REV) $(LIBDIR)/libjemalloc@install_suffix@.so + install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR) + ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO) install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR) install_man: @@ -128,7 +140,7 @@ check: tests for t in $(CTESTS:@srcroot@%.c=@objroot@%); do \ total=`expr $$total + 1`; \ /bin/echo -n "$${t} ... "; \ - $${t} @abs_srcroot@ @abs_objroot@ \ + $(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \ > @objroot@$${t}.out 2>&1; \ if test -e "@srcroot@$${t}.exp"; then \ diff -u @srcroot@$${t}.exp \ @@ -161,8 +173,7 @@ distclean: clean rm -rf @objroot@autom4te.cache rm -f @objroot@config.log rm -f @objroot@config.status - rm -f @objroot@cfghdrs.stamp - rm -f @objroot@cfgoutputs.stamp + rm -f @objroot@config.stamp rm -f @cfghdrs_out@ rm -f @cfgoutputs_out@ diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac index bf165960..21f502c6 100644 --- a/jemalloc/configure.ac +++ b/jemalloc/configure.ac @@ -150,7 +150,7 @@ JE_COMPILABLE([__attribute__ syntax], [attribute]) if test "x${attribute}" = "xyes" ; then AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ]) - if test "x$GCC" = "xyes" ; then + if test "x$GCC" = "xyes" -a "${abi}" = "xelf"; then JE_CFLAGS_APPEND([-fvisibility=internal]) fi fi @@ -166,17 +166,20 @@ case "${host}" in *-*-darwin*) CFLAGS="$CFLAGS -fno-common -no-cpp-precomp" abi="macho" + AC_DEFINE([JEMALLOC_PURGE_MSYNC_KILLPAGES]) RPATH="" ;; *-*-freebsd*) CFLAGS="$CFLAGS" abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) RPATH="-Wl,-rpath," ;; *-*-linux*) CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED]) RPATH="-Wl,-rpath," ;; *-*-netbsd*) @@ -191,6 +194,7 @@ case "${host}" in [CFLAGS="$CFLAGS"; abi="elf"], [abi="aout"]) AC_MSG_RESULT([$abi]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) RPATH="-Wl,-rpath," ;; *-*-solaris2*) @@ -245,7 +249,11 @@ dnl Do not prefix public APIs by default. AC_ARG_WITH([jemalloc_prefix], [AS_HELP_STRING([--with-jemalloc-prefix=], [Prefix to prepend to all public APIs])], [JEMALLOC_PREFIX="$with_jemalloc_prefix"], - [JEMALLOC_PREFIX=] + [if test "x$abi" != "xmacho" ; then + JEMALLOC_PREFIX="" +else + JEMALLOC_PREFIX="je_" +fi] ) if test "x$JEMALLOC_PREFIX" != "x" ; then AC_DEFINE([JEMALLOC_PREFIX], [ ]) @@ -294,6 +302,7 @@ fi ) if test "x$enable_debug" = "x1" ; then AC_DEFINE([JEMALLOC_DEBUG], [ ]) + AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) fi AC_SUBST([enable_debug]) @@ -379,7 +388,44 @@ else fi, LUNWIND="-lunwind" ) -dnl Finish prof-related definitions below, once TLS configuration is done. +if test "x$enable_prof" = "x1" ; then + LIBS="$LIBS -lm" + AC_DEFINE([JEMALLOC_PROF], [ ]) + if test "x$enable_prof_libunwind" = "x1" ; then + AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"]) + if test "x$LUNWIND" = "x-lunwind" ; then + AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"], + [enable_prof_libunwind="0"]) + else + LIBS="$LIBS $LUNWIND" + fi + if test "x${enable_prof_libunwind}" = "x1" ; then + AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ]) + fi + fi +fi +AC_SUBST([enable_prof]) +if test "x$enable_prof" = "x0" ; then + roff_prof=".\\\" " + roff_no_prof="" +else + roff_prof="" + roff_no_prof=".\\\" " +fi +AC_SUBST([roff_prof]) +AC_SUBST([roff_no_prof]) + +dnl If libunwind isn't enabled, try to use libgcc rather than gcc intrinsics +dnl for backtracing. +if test "x$enable_prof" = "x1" -a "x$enable_prof_libunwind" = "x0" \ + -a "x$GCC" = "xyes" -a "x$enable_prof_libgcc" = "x1" ; then + enable_prof_libgcc="1" + AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"]) + AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"]) + if test "x${enable_prof_libgcc}" = "x1" ; then + AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ]) + fi +fi dnl Enable tiny allocations by default. AC_ARG_ENABLE([tiny], @@ -417,7 +463,19 @@ fi ], [enable_tcache="1"] ) -dnl Finish tcache-related definitions below, once TLS configuration is done. +if test "x$enable_tcache" = "x1" ; then + AC_DEFINE([JEMALLOC_TCACHE], [ ]) +fi +AC_SUBST([enable_tcache]) +if test "x$enable_tcache" = "x0" ; then + roff_tcache=".\\\" " + roff_no_tcache="" +else + roff_tcache="" + roff_no_tcache=".\\\" " +fi +AC_SUBST([roff_tcache]) +AC_SUBST([roff_no_tcache]) dnl Do not enable mmap()ped swap files by default. AC_ARG_ENABLE([swap], @@ -650,71 +708,52 @@ fi AC_SUBST([enable_tls]) if test "x${enable_tls}" = "x0" ; then AC_DEFINE_UNQUOTED([NO_TLS], [ ]) - roff_tls=".\\\" " -else - roff_tls="" fi -AC_SUBST([roff_tls]) -dnl Finish tcache-related definitions, now that TLS configuration is done. -if test "x$enable_tls" = "x0" ; then - enable_tcache="0" -fi -if test "x$enable_tcache" = "x1" ; then - AC_DEFINE([JEMALLOC_TCACHE], [ ]) -fi -AC_SUBST([enable_tcache]) -if test "x$enable_tcache" = "x0" ; then - roff_tcache=".\\\" " - roff_no_tcache="" -else - roff_tcache="" - roff_no_tcache=".\\\" " -fi -AC_SUBST([roff_tcache]) -AC_SUBST([roff_no_tcache]) +dnl ============================================================================ +dnl Darwin-related configuration. -dnl Finish prof-related definitions, now that TLS configuration is done. -if test "x$enable_tls" = "x0" ; then - enable_prof="0" -fi -if test "x$enable_prof" = "x1" ; then - LIBS="$LIBS -lm" - AC_DEFINE([JEMALLOC_PROF], [ ]) - if test "x$enable_prof_libunwind" = "x1" ; then - AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"]) - if test "x$LUNWIND" = "x-lunwind" ; then - AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"], - [enable_prof_libunwind="0"]) - else - LIBS="$LIBS $LUNWIND" - fi - if test "x${enable_prof_libunwind}" = "x1" ; then - AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ]) - fi - fi -fi -AC_SUBST([enable_prof]) -if test "x$enable_prof" = "x0" ; then - roff_prof=".\\\" " - roff_no_prof="" -else - roff_prof="" - roff_no_prof=".\\\" " -fi -AC_SUBST([roff_prof]) -AC_SUBST([roff_no_prof]) +if test "x${abi}" = "xmacho" ; then + AC_DEFINE([JEMALLOC_IVSALLOC]) + AC_DEFINE([JEMALLOC_ZONE]) -dnl If libunwind isn't enabled, try to use libgcc rather than gcc intrinsics -dnl for backtracing. -if test "x$enable_prof" = "x1" -a "x$enable_prof_libunwind" = "x0" \ - -a "x$GCC" = "xyes" -a "x$enable_prof_libgcc" = "x1" ; then - enable_prof_libgcc="1" - AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"]) - AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"]) - if test "x${enable_prof_libgcc}" = "x1" ; then - AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ]) - fi + dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6 + dnl releases. malloc_zone_t and malloc_introspection_t have new fields in + dnl 10.6, which is the only source-level indication of the change. + AC_MSG_CHECKING([malloc zone version]) + AC_TRY_COMPILE([#include +#include ], [ + static malloc_zone_t zone; + static struct malloc_introspection_t zone_introspect; + + zone.size = NULL; + zone.malloc = NULL; + zone.calloc = NULL; + zone.valloc = NULL; + zone.free = NULL; + zone.realloc = NULL; + zone.destroy = NULL; + zone.zone_name = "jemalloc_zone"; + zone.batch_malloc = NULL; + zone.batch_free = NULL; + zone.introspect = &zone_introspect; + zone.version = 6; + zone.memalign = NULL; + zone.free_definite_size = NULL; + + zone_introspect.enumerator = NULL; + zone_introspect.good_size = NULL; + zone_introspect.check = NULL; + zone_introspect.print = NULL; + zone_introspect.log = NULL; + zone_introspect.force_lock = NULL; + zone_introspect.force_unlock = NULL; + zone_introspect.statistics = NULL; + zone_introspect.zone_locked = NULL; +], [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [6]) + AC_MSG_RESULT([6])], + [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [3]) + AC_MSG_RESULT([3])]) fi dnl ============================================================================ @@ -773,4 +812,5 @@ AC_MSG_RESULT([swap : ${enable_swap}]) AC_MSG_RESULT([dss : ${enable_dss}]) AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}]) AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) +AC_MSG_RESULT([tls : ${enable_tls}]) AC_MSG_RESULT([===============================================================================]) diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in index d2d5b77f..dfc4d763 100644 --- a/jemalloc/doc/jemalloc.3.in +++ b/jemalloc/doc/jemalloc.3.in @@ -464,7 +464,7 @@ is a single CPU. @roff_swap@This option is enabled by default. .It P The -.Fn malloc_stats_print +.Fn @jemalloc_prefix@malloc_stats_print function is called at program exit via an .Xr atexit 3 function. @@ -626,7 +626,7 @@ round your allocation requests up to the nearest multiple of the cacheline size. .Sh MALLCTL NAMESPACE The following names are defined in the namespace accessible via the -.Fn mallctl* +.Fn @jemalloc_prefix@mallctl* functions. Value types are specified in parentheses, and their readable/writable statuses are encoded as rw, r-, -w, or --. @@ -648,7 +648,7 @@ Return the jemalloc version string. .It Sy "epoch (uint64_t) rw" .Bd -ragged -offset indent -compact If a value is passed in, refresh the data from which the -.Fn mallctl* +.Fn @jemalloc_prefix@mallctl* functions report values, and increment the epoch. Return the current epoch. This is useful for detecting whether another thread caused a refresh. @@ -669,18 +669,17 @@ This is useful for detecting whether another thread caused a refresh. @roff_tcache@find manual flushing useful. .Ed .\"----------------------------------------------------------------------------- -@roff_tls@.It Sy "thread.arena (unsigned) rw" -@roff_tls@.Bd -ragged -offset indent -compact -@roff_tls@Get or set the arena associated with the calling thread. -@roff_tls@The arena index must be less than the maximum number of arenas (see -@roff_tls@the -@roff_tls@.Dq arenas.narenas -@roff_tls@mallctl). -@roff_tls@If the specified arena was not initialized beforehand (see the -@roff_tls@.Dq arenas.initialized -@roff_tls@mallctl), it will be automatically initialized as a side effect of -@roff_tls@calling this interface. -@roff_tls@.Ed +.It Sy "thread.arena (unsigned) rw" +.Bd -ragged -offset indent -compact +Get or set the arena associated with the calling thread. +The arena index must be less than the maximum number of arenas (see the +.Dq arenas.narenas +mallctl). +If the specified arena was not initialized beforehand (see the +.Dq arenas.initialized +mallctl), it will be automatically initialized as a side effect of calling this +interface. +.Ed .\"----------------------------------------------------------------------------- .It Sy "config.debug (bool) r-" .Bd -ragged -offset indent -compact @@ -1442,7 +1441,7 @@ Attempt to read or write void value, or attempt to write read-only value. A memory allocation failure occurred. .It Bq Er EFAULT An interface with side effects failed in some way not directly related to -.Fn mallctl* +.Fn @jemalloc_prefix@mallctl* read/write processing. .El .Sh ENVIRONMENT diff --git a/jemalloc/include/jemalloc/internal/chunk.h b/jemalloc/include/jemalloc/internal/chunk.h index 1f6abf78..d7955298 100644 --- a/jemalloc/include/jemalloc/internal/chunk.h +++ b/jemalloc/include/jemalloc/internal/chunk.h @@ -39,13 +39,17 @@ extern malloc_mutex_t chunks_mtx; extern chunk_stats_t stats_chunks; #endif +#ifdef JEMALLOC_IVSALLOC +extern rtree_t *chunks_rtree; +#endif + extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; extern size_t arena_chunk_header_npages; extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(size_t size, bool *zero); +void *chunk_alloc(size_t size, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size); bool chunk_boot(void); diff --git a/jemalloc/include/jemalloc/internal/chunk_mmap.h b/jemalloc/include/jemalloc/internal/chunk_mmap.h index dc52448c..07b50a4d 100644 --- a/jemalloc/include/jemalloc/internal/chunk_mmap.h +++ b/jemalloc/include/jemalloc/internal/chunk_mmap.h @@ -13,6 +13,8 @@ void *chunk_alloc_mmap(size_t size); void *chunk_alloc_mmap_noreserve(size_t size); void chunk_dealloc_mmap(void *chunk, size_t size); +bool chunk_mmap_boot(void); + #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index 2c3f32f1..a8d27fa7 100644 --- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -27,6 +27,13 @@ #define JEMALLOC_MANGLE #include "../jemalloc@install_suffix@.h" +#ifdef JEMALLOC_ZONE +#include +#include +#include +#include +#endif + #ifdef JEMALLOC_LAZY_LOCK #include #endif @@ -159,6 +166,16 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) #define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) +#ifdef PAGE_SHIFT +# undef PAGE_SHIFT +#endif +#ifdef PAGE_SIZE +# undef PAGE_SIZE +#endif +#ifdef PAGE_MASK +# undef PAGE_MASK +#endif + #ifdef DYNAMIC_PAGE_SHIFT # define PAGE_SHIFT lg_pagesize # define PAGE_SIZE pagesize @@ -184,9 +201,13 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/prof.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif #undef JEMALLOC_H_TYPES /******************************************************************************/ @@ -203,9 +224,13 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/prof.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif #undef JEMALLOC_H_STRUCTS /******************************************************************************/ @@ -240,8 +265,19 @@ extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ * Map of pthread_self() --> arenas[???], used for selecting an arena to use * for allocations. */ -extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); +extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); +# define ARENA_GET() arenas_tls +# define ARENA_SET(v) do { \ + arenas_tls = (v); \ +} while (0) +#else +extern pthread_key_t arenas_tsd; +# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) +# define ARENA_SET(v) do { \ + pthread_setspecific(arenas_tsd, (void *)(v)); \ +} while (0) #endif + /* * Arenas that are used to service external requests. Not all elements of the * arenas array are necessarily used; arenas are created lazily as needed. @@ -250,9 +286,9 @@ extern arena_t **arenas; extern unsigned narenas; arena_t *arenas_extend(unsigned ind); -#ifndef NO_TLS arena_t *choose_arena_hard(void); -#endif +void jemalloc_prefork(void); +void jemalloc_postfork(void); #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" @@ -265,9 +301,13 @@ arena_t *choose_arena_hard(void); #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/prof.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif #undef JEMALLOC_H_EXTERNS /******************************************************************************/ @@ -285,11 +325,30 @@ arena_t *choose_arena_hard(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE +size_t pow2_ceil(size_t x); void malloc_write(const char *s); arena_t *choose_arena(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil(size_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if (LG_SIZEOF_PTR == 3) + x |= x >> 32; +#endif + x++; + return (x); +} + /* * Wrapper around malloc_message() that avoids the need for * JEMALLOC_P(malloc_message)(...) throughout the code. @@ -310,76 +369,33 @@ choose_arena(void) { arena_t *ret; - /* - * We can only use TLS if this is a PIC library, since for the static - * library version, libc's malloc is used by TLS allocation, which - * introduces a bootstrapping issue. - */ -#ifndef NO_TLS - ret = arenas_map; + ret = ARENA_GET(); if (ret == NULL) { ret = choose_arena_hard(); assert(ret != NULL); } -#else - if (isthreaded && narenas > 1) { - unsigned long ind; - /* - * Hash pthread_self() to one of the arenas. There is a prime - * number of arenas, so this has a reasonable chance of - * working. Even so, the hashing can be easily thwarted by - * inconvenient pthread_self() values. Without specific - * knowledge of how pthread_self() calculates values, we can't - * easily do much better than this. - */ - ind = (unsigned long) pthread_self() % narenas; - - /* - * Optimistially assume that arenas[ind] has been initialized. - * At worst, we find out that some other thread has already - * done so, after acquiring the lock in preparation. Note that - * this lazy locking also has the effect of lazily forcing - * cache coherency; without the lock acquisition, there's no - * guarantee that modification of arenas[ind] by another thread - * would be seen on this CPU for an arbitrary amount of time. - * - * In general, this approach to modifying a synchronized value - * isn't a good idea, but in this case we only ever modify the - * value once, so things work out well. - */ - ret = arenas[ind]; - if (ret == NULL) { - /* - * Avoid races with another thread that may have already - * initialized arenas[ind]. - */ - malloc_mutex_lock(&arenas_lock); - if (arenas[ind] == NULL) - ret = arenas_extend((unsigned)ind); - else - ret = arenas[ind]; - malloc_mutex_unlock(&arenas_lock); - } - } else - ret = arenas[0]; -#endif - - assert(ret != NULL); return (ret); } #endif +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/arena.h" #include "jemalloc/internal/hash.h" #include "jemalloc/internal/prof.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif #ifndef JEMALLOC_ENABLE_INLINE void *imalloc(size_t size); void *icalloc(size_t size); void *ipalloc(size_t alignment, size_t size); size_t isalloc(const void *ptr); +# ifdef JEMALLOC_IVSALLOC +size_t ivsalloc(const void *ptr); +# endif void *iralloc(void *ptr, size_t size); void idalloc(void *ptr); #endif @@ -526,6 +542,19 @@ isalloc(const void *ptr) return (ret); } +#ifdef JEMALLOC_IVSALLOC +JEMALLOC_INLINE size_t +ivsalloc(const void *ptr) +{ + + /* Return 0 if ptr is not within a chunk managed by jemalloc. */ + if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) + return (0); + + return (isalloc(ptr)); +} +#endif + JEMALLOC_INLINE void * iralloc(void *ptr, size_t size) { diff --git a/jemalloc/include/jemalloc/internal/mutex.h b/jemalloc/include/jemalloc/internal/mutex.h index 108bfa8a..81134153 100644 --- a/jemalloc/include/jemalloc/internal/mutex.h +++ b/jemalloc/include/jemalloc/internal/mutex.h @@ -3,6 +3,12 @@ typedef pthread_mutex_t malloc_mutex_t; +#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +#else +# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#endif + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS diff --git a/jemalloc/include/jemalloc/internal/rtree.h b/jemalloc/include/jemalloc/internal/rtree.h new file mode 100644 index 00000000..9d58ebac --- /dev/null +++ b/jemalloc/include/jemalloc/internal/rtree.h @@ -0,0 +1,161 @@ +/* + * This radix tree implementation is tailored to the singular purpose of + * tracking which chunks are currently owned by jemalloc. This functionality + * is mandatory for OS X, where jemalloc must be able to respond to object + * ownership queries. + * + ******************************************************************************* + */ +#ifdef JEMALLOC_H_TYPES + +typedef struct rtree_s rtree_t; + +/* + * Size of each radix tree node (must be a power of 2). This impacts tree + * depth. + */ +#if (LG_SIZEOF_PTR == 2) +# define RTREE_NODESIZE (1U << 14) +#else +# define RTREE_NODESIZE CACHELINE +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct rtree_s { + malloc_mutex_t mutex; + void **root; + unsigned height; + unsigned level2bits[1]; /* Dynamically sized. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +rtree_t *rtree_new(unsigned bits); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +#ifndef JEMALLOC_DEBUG +void *rtree_get_locked(rtree_t *rtree, uintptr_t key); +#endif +void *rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_)) +#define RTREE_GET_GENERATE(f) \ +/* The least significant bits of the key are ignored. */ \ +JEMALLOC_INLINE void * \ +f(rtree_t *rtree, uintptr_t key) \ +{ \ + void *ret; \ + uintptr_t subkey; \ + unsigned i, lshift, height, bits; \ + void **node, **child; \ + \ + RTREE_LOCK(&rtree->mutex); \ + for (i = lshift = 0, height = rtree->height, node = rtree->root;\ + i < height - 1; \ + i++, lshift += bits, node = child) { \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ + 3)) - bits); \ + child = (void**)node[subkey]; \ + if (child == NULL) { \ + RTREE_UNLOCK(&rtree->mutex); \ + return (NULL); \ + } \ + } \ + \ + /* \ + * node is a leaf, so it contains values rather than node \ + * pointers. \ + */ \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ + bits); \ + ret = node[subkey]; \ + RTREE_UNLOCK(&rtree->mutex); \ + \ + RTREE_GET_VALIDATE \ + return (ret); \ +} + +#ifdef JEMALLOC_DEBUG +# define RTREE_LOCK(l) malloc_mutex_lock(l) +# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) +# define RTREE_GET_VALIDATE +RTREE_GET_GENERATE(rtree_get_locked) +# undef RTREE_LOCK +# undef RTREE_UNLOCK +# undef RTREE_GET_VALIDATE +#endif + +#define RTREE_LOCK(l) +#define RTREE_UNLOCK(l) +#ifdef JEMALLOC_DEBUG + /* + * Suppose that it were possible for a jemalloc-allocated chunk to be + * munmap()ped, followed by a different allocator in another thread re-using + * overlapping virtual memory, all without invalidating the cached rtree + * value. The result would be a false positive (the rtree would claim that + * jemalloc owns memory that it had actually discarded). This scenario + * seems impossible, but the following assertion is a prudent sanity check. + */ +# define RTREE_GET_VALIDATE \ + assert(rtree_get_locked(rtree, key) == ret); +#else +# define RTREE_GET_VALIDATE +#endif +RTREE_GET_GENERATE(rtree_get) +#undef RTREE_LOCK +#undef RTREE_UNLOCK +#undef RTREE_GET_VALIDATE + +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, void *val) +{ + uintptr_t subkey; + unsigned i, lshift, height, bits; + void **node, **child; + + malloc_mutex_lock(&rtree->mutex); + for (i = lshift = 0, height = rtree->height, node = rtree->root; + i < height - 1; + i++, lshift += bits, node = child) { + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + bits); + child = (void**)node[subkey]; + if (child == NULL) { + child = (void**)base_alloc(sizeof(void *) << + rtree->level2bits[i+1]); + if (child == NULL) { + malloc_mutex_unlock(&rtree->mutex); + return (true); + } + memset(child, 0, sizeof(void *) << + rtree->level2bits[i+1]); + node[subkey] = child; + } + } + + /* node is a leaf, so it contains values rather than node pointers. */ + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); + node[subkey] = val; + malloc_mutex_unlock(&rtree->mutex); + + return (false); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h index a8be436d..df302fba 100644 --- a/jemalloc/include/jemalloc/internal/tcache.h +++ b/jemalloc/include/jemalloc/internal/tcache.h @@ -65,8 +65,21 @@ extern ssize_t opt_lg_tcache_maxclass; extern ssize_t opt_lg_tcache_gc_sweep; /* Map of thread-specific caches. */ +#ifndef NO_TLS extern __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); +# define TCACHE_GET() tcache_tls +# define TCACHE_SET(v) do { \ + tcache_tls = (v); \ + pthread_setspecific(tcache_tsd, (void *)(v)); \ +} while (0) +#else +extern pthread_key_t tcache_tsd; +# define TCACHE_GET() ((tcache_t *)pthread_getspecific(tcache_tsd)) +# define TCACHE_SET(v) do { \ + pthread_setspecific(tcache_tsd, (void *)(v)); \ +} while (0) +#endif /* * Number of tcache bins. There are nbins small-object bins, plus 0 or more @@ -122,14 +135,23 @@ tcache_get(void) if ((isthreaded & opt_tcache) == false) return (NULL); - tcache = tcache_tls; - if ((uintptr_t)tcache <= (uintptr_t)1) { + tcache = TCACHE_GET(); + if ((uintptr_t)tcache <= (uintptr_t)2) { if (tcache == NULL) { tcache = tcache_create(choose_arena()); if (tcache == NULL) return (NULL); - } else + } else { + if (tcache == (void *)(uintptr_t)1) { + /* + * Make a note that an allocator function was + * called after the tcache_thread_cleanup() was + * called. + */ + TCACHE_SET((uintptr_t)2); + } return (NULL); + } } return (tcache); diff --git a/jemalloc/include/jemalloc/internal/zone.h b/jemalloc/include/jemalloc/internal/zone.h new file mode 100644 index 00000000..859b529d --- /dev/null +++ b/jemalloc/include/jemalloc/internal/zone.h @@ -0,0 +1,23 @@ +#ifndef JEMALLOC_ZONE +# error "This source file is for zones on Darwin (OS X)." +#endif +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +malloc_zone_t *create_zone(void); +void szone2ozone(malloc_zone_t *zone); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in index 8b98d670..eed33a64 100644 --- a/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -92,6 +92,34 @@ /* TLS is used to map arenas and magazine caches to threads. */ #undef NO_TLS +/* + * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside + * within jemalloc-owned chunks before dereferencing them. + */ +#undef JEMALLOC_IVSALLOC + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +#undef JEMALLOC_ZONE +#undef JEMALLOC_ZONE_VERSION + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched. + * madvise(..., MADV_FREE) : On FreeBSD, this marks pages as being unused, + * such that they will be discarded rather than + * swapped out. + * msync(..., MS_KILLPAGES) : On Darwin, this behaves similarly to + * madvise(..., MADV_FREE) on FreeBSD. + */ +#undef JEMALLOC_PURGE_MADVISE_DONTNEED +#undef JEMALLOC_PURGE_MADVISE_FREE +#undef JEMALLOC_PURGE_MSYNC_KILLPAGES + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c index ee859fcb..db3d4010 100644 --- a/jemalloc/src/arena.c +++ b/jemalloc/src/arena.c @@ -181,9 +181,6 @@ static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t size, size_t oldsize); static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize); -#ifdef JEMALLOC_TINY -static size_t pow2_ceil(size_t x); -#endif static bool small_size2bin_init(void); #ifdef JEMALLOC_DEBUG static void small_size2bin_validate(void); @@ -426,7 +423,7 @@ arena_chunk_alloc(arena_t *arena) zero = false; malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero); + chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero); malloc_mutex_lock(&arena->lock); if (chunk == NULL) return (NULL); @@ -606,10 +603,18 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) ql_new(&mapelms); flag_zeroed = -#ifdef JEMALLOC_SWAP +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED + /* + * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous + * mappings, but not for file-backed mappings. + */ +# ifdef JEMALLOC_SWAP swap_enabled ? 0 : -#endif +# endif CHUNK_MAP_ZEROED; +#else + 0; +#endif /* * If chunk is the spare, temporarily re-allocate it, 1) so that its @@ -649,9 +654,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) /* * Update internal elements in the page map, so * that CHUNK_MAP_ZEROED is properly set. - * madvise(..., MADV_DONTNEED) results in - * zero-filled pages for anonymous mappings, - * but not for file-backed mappings. */ mapelm->bits = (npages << PAGE_SHIFT) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | @@ -715,8 +717,20 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) assert(ndirty >= npages); ndirty -= npages; #endif + +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), (npages << PAGE_SHIFT), MADV_DONTNEED); +#elif defined(JEMALLOC_PURGE_MADVISE_FREE) + madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), + (npages << PAGE_SHIFT), MADV_FREE); +#elif defined(JEMALLOC_PURGE_MSYNC_KILLPAGES) + msync((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), + (npages << PAGE_SHIFT), MS_KILLPAGES); +#else +# error "No method defined for purging unused dirty pages." +#endif + #ifdef JEMALLOC_STATS nmadvise++; #endif @@ -2239,26 +2253,6 @@ arena_new(arena_t *arena, unsigned ind) return (false); } -#ifdef JEMALLOC_TINY -/* Compute the smallest power of 2 that is >= x. */ -static size_t -pow2_ceil(size_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; -#if (SIZEOF_PTR == 8) - x |= x >> 32; -#endif - x++; - return (x); -} -#endif - #ifdef JEMALLOC_DEBUG static void small_size2bin_validate(void) diff --git a/jemalloc/src/base.c b/jemalloc/src/base.c index 605197ea..cc85e849 100644 --- a/jemalloc/src/base.c +++ b/jemalloc/src/base.c @@ -32,7 +32,7 @@ base_pages_alloc(size_t minsize) assert(minsize != 0); csize = CHUNK_CEILING(minsize); zero = false; - base_pages = chunk_alloc(csize, &zero); + base_pages = chunk_alloc(csize, true, &zero); if (base_pages == NULL) return (true); base_next_addr = base_pages; diff --git a/jemalloc/src/chunk.c b/jemalloc/src/chunk.c index e6e3bcd1..5cb99615 100644 --- a/jemalloc/src/chunk.c +++ b/jemalloc/src/chunk.c @@ -14,6 +14,10 @@ malloc_mutex_t chunks_mtx; chunk_stats_t stats_chunks; #endif +#ifdef JEMALLOC_IVSALLOC +rtree_t *chunks_rtree; +#endif + /* Various chunk-related settings. */ size_t chunksize; size_t chunksize_mask; /* (chunksize - 1). */ @@ -30,7 +34,7 @@ size_t arena_maxclass; /* Max size class for arenas. */ * advantage of them if they are returned. */ void * -chunk_alloc(size_t size, bool *zero) +chunk_alloc(size_t size, bool base, bool *zero) { void *ret; @@ -63,6 +67,14 @@ chunk_alloc(size_t size, bool *zero) /* All strategies for allocation failed. */ ret = NULL; RETURN: +#ifdef JEMALLOC_IVSALLOC + if (base == false && ret != NULL) { + if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { + chunk_dealloc(ret, size); + return (NULL); + } + } +#endif #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) if (ret != NULL) { # ifdef JEMALLOC_PROF @@ -104,6 +116,9 @@ chunk_dealloc(void *chunk, size_t size) assert(size != 0); assert((size & chunksize_mask) == 0); +#ifdef JEMALLOC_IVSALLOC + rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); +#endif #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) malloc_mutex_lock(&chunks_mtx); stats_chunks.curchunks -= (size / chunksize); @@ -126,21 +141,27 @@ chunk_boot(void) { /* Set variables according to the value of opt_lg_chunk. */ - chunksize = (1LU << opt_lg_chunk); + chunksize = (ZU(1) << opt_lg_chunk); assert(chunksize >= PAGE_SIZE); chunksize_mask = chunksize - 1; chunk_npages = (chunksize >> PAGE_SHIFT); +#ifdef JEMALLOC_IVSALLOC + chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); + if (chunks_rtree == NULL) + return (true); +#endif #if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) if (malloc_mutex_init(&chunks_mtx)) return (true); memset(&stats_chunks, 0, sizeof(chunk_stats_t)); #endif - #ifdef JEMALLOC_SWAP if (chunk_swap_boot()) return (true); #endif + if (chunk_mmap_boot()) + return (true); #ifdef JEMALLOC_DSS if (chunk_dss_boot()) return (true); diff --git a/jemalloc/src/chunk_mmap.c b/jemalloc/src/chunk_mmap.c index d9f9e86d..a3d09e9a 100644 --- a/jemalloc/src/chunk_mmap.c +++ b/jemalloc/src/chunk_mmap.c @@ -6,19 +6,22 @@ /* * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and - * potentially avoid some system calls. We can get away without TLS here, - * since the state of mmap_unaligned only affects performance, rather than - * correct function. + * potentially avoid some system calls. */ -static #ifndef NO_TLS - __thread +static __thread bool mmap_unaligned_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +#define MMAP_UNALIGNED_GET() mmap_unaligned_tls +#define MMAP_UNALIGNED_SET(v) do { \ + mmap_unaligned_tls = (v); \ +} while (0) +#else +static pthread_key_t mmap_unaligned_tsd; +#define MMAP_UNALIGNED_GET() ((bool)pthread_getspecific(mmap_unaligned_tsd)) +#define MMAP_UNALIGNED_SET(v) do { \ + pthread_setspecific(mmap_unaligned_tsd, (void *)(v)); \ +} while (0) #endif - bool mmap_unaligned -#ifndef NO_TLS - JEMALLOC_ATTR(tls_model("initial-exec")) -#endif - ; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -128,7 +131,7 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) * method. */ if (unaligned == false) - mmap_unaligned = false; + MMAP_UNALIGNED_SET(false); return (ret); } @@ -166,7 +169,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) * fast method next time. */ - if (mmap_unaligned == false) { + if (MMAP_UNALIGNED_GET() == false) { size_t offset; ret = pages_map(NULL, size, noreserve); @@ -175,7 +178,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) offset = CHUNK_ADDR2OFFSET(ret); if (offset != 0) { - mmap_unaligned = true; + MMAP_UNALIGNED_SET(true); /* Try to extend chunk boundary. */ if (pages_map((void *)((uintptr_t)ret + size), chunksize - offset, noreserve) == NULL) { @@ -184,7 +187,8 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve) * the reliable-but-expensive method. */ pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, true, noreserve); + ret = chunk_alloc_mmap_slow(size, true, + noreserve); } else { /* Clean up unneeded leading space. */ pages_unmap(ret, chunksize - offset); @@ -216,3 +220,17 @@ chunk_dealloc_mmap(void *chunk, size_t size) pages_unmap(chunk, size); } + +bool +chunk_mmap_boot(void) +{ + +#ifdef NO_TLS + if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) { + malloc_write(": Error in pthread_key_create()\n"); + return (true); + } +#endif + + return (false); +} diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c index 128883f8..64913067 100644 --- a/jemalloc/src/ctl.c +++ b/jemalloc/src/ctl.c @@ -41,9 +41,7 @@ CTL_PROTO(epoch) #ifdef JEMALLOC_TCACHE CTL_PROTO(tcache_flush) #endif -#ifndef NO_TLS CTL_PROTO(thread_arena) -#endif CTL_PROTO(config_debug) CTL_PROTO(config_dss) CTL_PROTO(config_dynamic_page_shift) @@ -213,11 +211,9 @@ static const ctl_node_t tcache_node[] = { }; #endif -#ifndef NO_TLS static const ctl_node_t thread_node[] = { {NAME("arena"), CTL(thread_arena)} }; -#endif static const ctl_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, @@ -457,9 +453,7 @@ static const ctl_node_t root_node[] = { #ifdef JEMALLOC_TCACHE {NAME("tcache"), CHILD(tcache)}, #endif -#ifndef NO_TLS {NAME("thread"), CHILD(thread)}, -#endif {NAME("config"), CHILD(config)}, {NAME("opt"), CHILD(opt)}, {NAME("arenas"), CHILD(arenas)}, @@ -1040,13 +1034,13 @@ tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, VOID(); - tcache = tcache_tls; + tcache = TCACHE_GET(); if (tcache == NULL) { ret = 0; goto RETURN; } tcache_destroy(tcache); - tcache_tls = NULL; + TCACHE_SET(NULL); ret = 0; RETURN: @@ -1054,7 +1048,6 @@ RETURN: } #endif -#ifndef NO_TLS static int thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) @@ -1085,14 +1078,13 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, } /* Set new arena association. */ - arenas_map = arena; + ARENA_SET(arena); } ret = 0; RETURN: return (ret); } -#endif /******************************************************************************/ diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c index 49962ea0..be35d16f 100644 --- a/jemalloc/src/huge.c +++ b/jemalloc/src/huge.c @@ -37,7 +37,7 @@ huge_malloc(size_t size, bool zero) if (node == NULL) return (NULL); - ret = chunk_alloc(csize, &zero); + ret = chunk_alloc(csize, false, &zero); if (ret == NULL) { base_node_dealloc(node); return (NULL); @@ -99,7 +99,7 @@ huge_palloc(size_t alignment, size_t size) return (NULL); zero = false; - ret = chunk_alloc(alloc_size, &zero); + ret = chunk_alloc(alloc_size, false, &zero); if (ret == NULL) { base_node_dealloc(node); return (NULL); diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index b36590dd..ebce3ca0 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -89,12 +89,12 @@ malloc_mutex_t arenas_lock; arena_t **arenas; unsigned narenas; -#ifndef NO_TLS static unsigned next_arena; -#endif #ifndef NO_TLS -__thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); +__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); +#else +pthread_key_t arenas_tsd; #endif /* Set to true once the allocator has been initialized. */ @@ -104,7 +104,7 @@ static bool malloc_initialized = false; static pthread_t malloc_initializer = (unsigned long)0; /* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP; +static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER; #ifdef DYNAMIC_PAGE_SHIFT size_t pagesize; @@ -146,8 +146,6 @@ static void wrtmessage(void *cbopaque, const char *s); static void stats_print_atexit(void); static unsigned malloc_ncpus(void); static bool malloc_init_hard(void); -static void jemalloc_prefork(void); -static void jemalloc_postfork(void); /******************************************************************************/ /* malloc_message() setup. */ @@ -200,7 +198,6 @@ arenas_extend(unsigned ind) return (arenas[0]); } -#ifndef NO_TLS /* * Choose an arena based on a per-thread value (slow-path code only, called * only by choose_arena()). @@ -219,11 +216,10 @@ choose_arena_hard(void) } else ret = arenas[0]; - arenas_map = ret; + ARENA_SET(ret); return (ret); } -#endif static void stats_print_atexit(void) @@ -697,14 +693,12 @@ MALLOC_OUT: return (true); } -#ifndef NO_TLS /* * Assign the initial arena to the initial thread, in order to avoid * spurious creation of an extra arena if the application switches to * threaded mode. */ - arenas_map = arenas[0]; -#endif + ARENA_SET(arenas[0]); malloc_mutex_init(&arenas_lock); @@ -748,35 +742,13 @@ MALLOC_OUT: narenas = 1; } -#ifdef NO_TLS - if (narenas > 1) { - static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19, - 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, - 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, - 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, - 223, 227, 229, 233, 239, 241, 251, 257, 263}; - unsigned nprimes, parenas; - - /* - * Pick a prime number of hash arenas that is more than narenas - * so that direct hashing of pthread_self() pointers tends to - * spread allocations evenly among the arenas. - */ - assert((narenas & 1) == 0); /* narenas must be even. */ - nprimes = (sizeof(primes) >> LG_SIZEOF_INT); - parenas = primes[nprimes - 1]; /* In case not enough primes. */ - for (i = 1; i < nprimes; i++) { - if (primes[i] > narenas) { - parenas = primes[i]; - break; - } - } - narenas = parenas; - } -#endif - -#ifndef NO_TLS next_arena = (narenas > 0) ? 1 : 0; + +#ifdef NO_TLS + if (pthread_key_create(&arenas_tsd, NULL) != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } #endif /* Allocate and initialize arenas. */ @@ -793,11 +765,35 @@ MALLOC_OUT: /* Copy the pointer to the one arena that was already initialized. */ arenas[0] = init_arenas[0]; +#ifdef JEMALLOC_ZONE + /* Register the custom zone. */ + malloc_zone_register(create_zone()); + + /* + * Convert the default szone to an "overlay zone" that is capable of + * deallocating szone-allocated objects, but allocating new objects + * from jemalloc. + */ + szone2ozone(malloc_default_zone()); +#endif + malloc_initialized = true; malloc_mutex_unlock(&init_lock); return (false); } + +#ifdef JEMALLOC_ZONE +JEMALLOC_ATTR(constructor) +void +jemalloc_darwin_init(void) +{ + + if (malloc_init_hard()) + abort(); +} +#endif + /* * End initialization functions. */ @@ -1219,8 +1215,12 @@ JEMALLOC_P(malloc_usable_size)(const void *ptr) { size_t ret; +#ifdef JEMALLOC_IVSALLOC + ret = ivsalloc(ptr); +#else assert(ptr != NULL); ret = isalloc(ptr); +#endif return (ret); } @@ -1298,11 +1298,13 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, * is threaded here. */ -static void +void jemalloc_prefork(void) { unsigned i; + assert(isthreaded); + /* Acquire all mutexes in a safe order. */ malloc_mutex_lock(&arenas_lock); @@ -1324,11 +1326,13 @@ jemalloc_prefork(void) #endif } -static void +void jemalloc_postfork(void) { unsigned i; + assert(isthreaded); + /* Release all mutexes, now that fork() has completed. */ #ifdef JEMALLOC_SWAP @@ -1349,3 +1353,5 @@ jemalloc_postfork(void) } malloc_mutex_unlock(&arenas_lock); } + +/******************************************************************************/ diff --git a/jemalloc/src/mutex.c b/jemalloc/src/mutex.c index 3b6081a4..337312bd 100644 --- a/jemalloc/src/mutex.c +++ b/jemalloc/src/mutex.c @@ -59,7 +59,11 @@ malloc_mutex_init(malloc_mutex_t *mutex) if (pthread_mutexattr_init(&attr) != 0) return (true); +#ifdef PTHREAD_MUTEX_ADAPTIVE_NP pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); +#else + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT); +#endif if (pthread_mutex_init(mutex, &attr) != 0) { pthread_mutexattr_destroy(&attr); return (true); diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c index 6d6910ed..e70b1325 100644 --- a/jemalloc/src/prof.c +++ b/jemalloc/src/prof.c @@ -45,7 +45,19 @@ static malloc_mutex_t bt2ctx_mtx; * into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t * objects. */ +#ifndef NO_TLS static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec")); +# define BT2CNT_GET() bt2cnt_tls +# define BT2CNT_SET(v) do { \ + bt2cnt_tls = (v); \ + pthread_setspecific(bt2cnt_tsd, (void *)(v)); \ +} while (0) +#else +# define BT2CNT_GET() ((ckh_t *)pthread_getspecific(bt2cnt_tsd)) +# define BT2CNT_SET(v) do { \ + pthread_setspecific(bt2cnt_tsd, (void *)(v)); \ +} while (0) +#endif /* * Same contents as b2cnt_tls, but initialized such that the TSD destructor is @@ -57,12 +69,45 @@ static pthread_key_t bt2cnt_tsd; /* (1U << opt_lg_prof_bt_max). */ static unsigned prof_bt_max; -static __thread uint64_t prof_sample_prn_state - JEMALLOC_ATTR(tls_model("initial-exec")); -static __thread uint64_t prof_sample_threshold - JEMALLOC_ATTR(tls_model("initial-exec")); -static __thread uint64_t prof_sample_accum +typedef struct prof_sample_state_s prof_sample_state_t; +struct prof_sample_state_s { + uint64_t prn_state; + uint64_t threshold; + uint64_t accum; +}; + +#ifndef NO_TLS +static __thread prof_sample_state_t prof_sample_state_tls JEMALLOC_ATTR(tls_model("initial-exec")); +# define PROF_SAMPLE_STATE_GET(r) do { \ + r = &prof_sample_state_tls; \ +} while (0) +#else +static pthread_key_t prof_sample_state_tsd; +/* Used only if an OOM error occurs in PROF_SAMPLE_STATE_GET(). */ +prof_sample_state_t prof_sample_state_oom; +# define PROF_SAMPLE_STATE_GET(r) do { \ + r = (prof_sample_state_t *)pthread_getspecific( \ + prof_sample_state_tsd); \ + if (r == NULL) { \ + r = ipalloc(CACHELINE, sizeof(prof_sample_state_t)); \ + if (r == NULL) { \ + malloc_write(": Error in heap " \ + "profiler: out of memory; subsequent heap " \ + "profiles may be inaccurate\n"); \ + if (opt_abort) \ + abort(); \ + /* Failure is not an option... */ \ + r = &prof_sample_state_oom; \ + } \ + pthread_setspecific(prof_sample_state_tsd, (void *)r); \ + } \ +} while (0) +# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) +# define ARENA_SET(v) do { \ + pthread_setspecific(arenas_tsd, (void *)(v)); \ +} while (0) +#endif static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; @@ -116,6 +161,9 @@ static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); static void bt2cnt_thread_cleanup(void *arg); +#ifdef NO_TLS +static void prof_sample_state_thread_cleanup(void *arg); +#endif /******************************************************************************/ @@ -436,7 +484,7 @@ static prof_thr_cnt_t * prof_lookup(prof_bt_t *bt) { prof_thr_cnt_t *ret; - ckh_t *bt2cnt = bt2cnt_tls; + ckh_t *bt2cnt = BT2CNT_GET(); if (bt2cnt == NULL) { /* Initialize an empty cache for this thread. */ @@ -448,8 +496,8 @@ prof_lookup(prof_bt_t *bt) idalloc(bt2cnt); return (NULL); } - bt2cnt_tls = bt2cnt; - pthread_setspecific(bt2cnt_tsd, bt2cnt); + + BT2CNT_SET(bt2cnt); } if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) { @@ -519,15 +567,17 @@ prof_sample_threshold_update(void) { uint64_t r; double u; + prof_sample_state_t *prof_sample_state; /* * Compute prof_sample_threshold as a geometrically distributed random * variable with mean (2^opt_lg_prof_sample). */ - prn64(r, 53, prof_sample_prn_state, (uint64_t)1125899906842625LLU, - 1058392653243283975); + PROF_SAMPLE_STATE_GET(prof_sample_state); + prn64(r, 53, prof_sample_state->prn_state, + (uint64_t)1125899906842625LLU, 1058392653243283975); u = (double)r * (1.0/9007199254740992.0L); - prof_sample_threshold = (uint64_t)(log(u) / + prof_sample_state->threshold = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + (uint64_t)1U; } @@ -551,26 +601,31 @@ prof_alloc_prep(size_t size) prof_backtrace(&bt, 2, prof_bt_max); ret = prof_lookup(&bt); } else { - if (prof_sample_threshold == 0) { + prof_sample_state_t *prof_sample_state; + + PROF_SAMPLE_STATE_GET(prof_sample_state); + if (prof_sample_state->threshold == 0) { /* * Initialize. Seed the prng differently for each * thread. */ - prof_sample_prn_state = (uint64_t)(uintptr_t)&size; + prof_sample_state->prn_state = + (uint64_t)(uintptr_t)&size; prof_sample_threshold_update(); } /* * Determine whether to capture a backtrace based on whether * size is enough for prof_accum to reach - * prof_sample_threshold. However, delay updating these + * prof_sample_state->threshold. However, delay updating these * variables until prof_{m,re}alloc(), because we don't know * for sure that the allocation will succeed. * * Use subtraction rather than addition to avoid potential * integer overflow. */ - if (size >= prof_sample_threshold - prof_sample_accum) { + if (size >= prof_sample_state->threshold - + prof_sample_state->accum) { bt_init(&bt, vec); prof_backtrace(&bt, 2, prof_bt_max); ret = prof_lookup(&bt); @@ -621,21 +676,26 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx) static inline void prof_sample_accum_update(size_t size) { + prof_sample_state_t *prof_sample_state; /* Sampling logic is unnecessary if the interval is 1. */ assert(opt_lg_prof_sample != 0); /* Take care to avoid integer overflow. */ - if (size >= prof_sample_threshold - prof_sample_accum) { - prof_sample_accum -= (prof_sample_threshold - size); + PROF_SAMPLE_STATE_GET(prof_sample_state); + if (size >= prof_sample_state->threshold - prof_sample_state->accum) { + prof_sample_state->accum -= (prof_sample_state->threshold - + size); /* Compute new prof_sample_threshold. */ prof_sample_threshold_update(); - while (prof_sample_accum >= prof_sample_threshold) { - prof_sample_accum -= prof_sample_threshold; + while (prof_sample_state->accum >= + prof_sample_state->threshold) { + prof_sample_state->accum -= + prof_sample_state->threshold; prof_sample_threshold_update(); } } else - prof_sample_accum += size; + prof_sample_state->accum += size; } void @@ -1244,7 +1304,7 @@ bt2cnt_thread_cleanup(void *arg) { ckh_t *bt2cnt; - bt2cnt = bt2cnt_tls; + bt2cnt = BT2CNT_GET(); if (bt2cnt != NULL) { ql_head(prof_thr_cnt_t) cnts_ql; size_t tabind; @@ -1278,7 +1338,7 @@ bt2cnt_thread_cleanup(void *arg) */ ckh_delete(bt2cnt); idalloc(bt2cnt); - bt2cnt_tls = NULL; + BT2CNT_SET(NULL); /* Delete cnt's. */ while ((cnt = ql_last(&cnts_ql, link)) != NULL) { @@ -1288,6 +1348,17 @@ bt2cnt_thread_cleanup(void *arg) } } +#ifdef NO_TLS +static void +prof_sample_state_thread_cleanup(void *arg) +{ + prof_sample_state_t *prof_sample_state = (prof_sample_state_t *)arg; + + if (prof_sample_state != &prof_sample_state_oom) + idalloc(prof_sample_state); +} +#endif + void prof_boot0(void) { @@ -1332,6 +1403,14 @@ prof_boot1(void) ": Error in pthread_key_create()\n"); abort(); } +#ifdef NO_TLS + if (pthread_key_create(&prof_sample_state_tsd, + prof_sample_state_thread_cleanup) != 0) { + malloc_write( + ": Error in pthread_key_create()\n"); + abort(); + } +#endif prof_bt_max = (1U << opt_lg_prof_bt_max); if (malloc_mutex_init(&prof_dump_seq_mtx)) diff --git a/jemalloc/src/rtree.c b/jemalloc/src/rtree.c new file mode 100644 index 00000000..a583751d --- /dev/null +++ b/jemalloc/src/rtree.c @@ -0,0 +1,42 @@ +#define RTREE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +rtree_t * +rtree_new(unsigned bits) +{ + rtree_t *ret; + unsigned bits_per_level, height, i; + + bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; + height = bits / bits_per_level; + if (height * bits_per_level != bits) + height++; + assert(height * bits_per_level >= bits); + + ret = (rtree_t*)base_alloc(sizeof(rtree_t) + (sizeof(unsigned) * + (height - 1))); + if (ret == NULL) + return (NULL); + memset(ret, 0, sizeof(rtree_t) + (sizeof(unsigned) * (height - 1))); + + malloc_mutex_init(&ret->mutex); + ret->height = height; + if (bits_per_level * height > bits) + ret->level2bits[0] = bits % bits_per_level; + else + ret->level2bits[0] = bits_per_level; + for (i = 1; i < height; i++) + ret->level2bits[i] = bits_per_level; + + ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]); + if (ret->root == NULL) { + /* + * We leak the rtree here, since there's no generic base + * deallocation. + */ + return (NULL); + } + memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); + + return (ret); +} diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c index ace24cea..86343835 100644 --- a/jemalloc/src/tcache.c +++ b/jemalloc/src/tcache.c @@ -9,13 +9,15 @@ ssize_t opt_lg_tcache_maxclass = LG_TCACHE_MAXCLASS_DEFAULT; ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; /* Map of thread-specific caches. */ +#ifndef NO_TLS __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); +#endif /* * Same contents as tcache, but initialized such that the TSD destructor is * called when a thread exits, so that the cache can be cleaned up. */ -static pthread_key_t tcache_tsd; +pthread_key_t tcache_tsd; size_t nhbins; size_t tcache_maxclass; @@ -239,8 +241,7 @@ tcache_create(arena_t *arena) for (; i < nhbins; i++) tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE; - tcache_tls = tcache; - pthread_setspecific(tcache_tsd, tcache); + TCACHE_SET(tcache); return (tcache); } @@ -328,11 +329,24 @@ tcache_thread_cleanup(void *arg) { tcache_t *tcache = (tcache_t *)arg; - assert(tcache == tcache_tls); - if (tcache != NULL) { + if (tcache == (void *)(uintptr_t)1) { + /* + * The previous time this destructor was called, we set the key + * to 1 so that other destructors wouldn't cause re-creation of + * the tcache. This time, do nothing, so that the destructor + * will not be called again. + */ + } else if (tcache == (void *)(uintptr_t)2) { + /* + * Another destructor called an allocator function after this + * destructor was called. Reset tcache to 1 in order to + * receive another callback. + */ + TCACHE_SET((uintptr_t)1); + } else if (tcache != NULL) { assert(tcache != (void *)(uintptr_t)1); tcache_destroy(tcache); - tcache_tls = (void *)(uintptr_t)1; + TCACHE_SET((uintptr_t)1); } } diff --git a/jemalloc/src/zone.c b/jemalloc/src/zone.c new file mode 100644 index 00000000..2c1b2318 --- /dev/null +++ b/jemalloc/src/zone.c @@ -0,0 +1,354 @@ +#include "jemalloc/internal/jemalloc_internal.h" +#ifndef JEMALLOC_ZONE +# error "This source file is for zones on Darwin (OS X)." +#endif + +/******************************************************************************/ +/* Data. */ + +static malloc_zone_t zone, szone; +static struct malloc_introspection_t zone_introspect, ozone_introspect; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static size_t zone_size(malloc_zone_t *zone, void *ptr); +static void *zone_malloc(malloc_zone_t *zone, size_t size); +static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size); +static void *zone_valloc(malloc_zone_t *zone, size_t size); +static void zone_free(malloc_zone_t *zone, void *ptr); +static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size); +#if (JEMALLOC_ZONE_VERSION >= 6) +static void *zone_memalign(malloc_zone_t *zone, size_t alignment, + size_t size); +static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, + size_t size); +#endif +static void *zone_destroy(malloc_zone_t *zone); +static size_t zone_good_size(malloc_zone_t *zone, size_t size); +static void zone_force_lock(malloc_zone_t *zone); +static void zone_force_unlock(malloc_zone_t *zone); +static size_t ozone_size(malloc_zone_t *zone, void *ptr); +static void ozone_free(malloc_zone_t *zone, void *ptr); +static void *ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size); +static unsigned ozone_batch_malloc(malloc_zone_t *zone, size_t size, + void **results, unsigned num_requested); +static void ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, + unsigned num); +#if (JEMALLOC_ZONE_VERSION >= 6) +static void ozone_free_definite_size(malloc_zone_t *zone, void *ptr, + size_t size); +#endif +static void ozone_force_lock(malloc_zone_t *zone); +static void ozone_force_unlock(malloc_zone_t *zone); + +/******************************************************************************/ +/* + * Functions. + */ + +static size_t +zone_size(malloc_zone_t *zone, void *ptr) +{ + + /* + * There appear to be places within Darwin (such as setenv(3)) that + * cause calls to this function with pointers that *no* zone owns. If + * we knew that all pointers were owned by *some* zone, we could split + * our zone into two parts, and use one as the default allocator and + * the other as the default deallocator/reallocator. Since that will + * not work in practice, we must check all pointers to assure that they + * reside within a mapped chunk before determining size. + */ + return (ivsalloc(ptr)); +} + +static void * +zone_malloc(malloc_zone_t *zone, size_t size) +{ + + return (JEMALLOC_P(malloc)(size)); +} + +static void * +zone_calloc(malloc_zone_t *zone, size_t num, size_t size) +{ + + return (JEMALLOC_P(calloc)(num, size)); +} + +static void * +zone_valloc(malloc_zone_t *zone, size_t size) +{ + void *ret = NULL; /* Assignment avoids useless compiler warning. */ + + JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); + + return (ret); +} + +static void +zone_free(malloc_zone_t *zone, void *ptr) +{ + + JEMALLOC_P(free)(ptr); +} + +static void * +zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) +{ + + return (JEMALLOC_P(realloc)(ptr, size)); +} + +#if (JEMALLOC_ZONE_VERSION >= 6) +static void * +zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) +{ + void *ret = NULL; /* Assignment avoids useless compiler warning. */ + + JEMALLOC_P(posix_memalign)(&ret, alignment, size); + + return (ret); +} + +static void +zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) +{ + + assert(ivsalloc(ptr) == size); + JEMALLOC_P(free)(ptr); +} +#endif + +static void * +zone_destroy(malloc_zone_t *zone) +{ + + /* This function should never be called. */ + assert(false); + return (NULL); +} + +static size_t +zone_good_size(malloc_zone_t *zone, size_t size) +{ + size_t ret; + void *p; + + /* + * Actually create an object of the appropriate size, then find out + * how large it could have been without moving up to the next size + * class. + */ + p = JEMALLOC_P(malloc)(size); + if (p != NULL) { + ret = isalloc(p); + JEMALLOC_P(free)(p); + } else + ret = size; + + return (ret); +} + +static void +zone_force_lock(malloc_zone_t *zone) +{ + + if (isthreaded) + jemalloc_prefork(); +} + +static void +zone_force_unlock(malloc_zone_t *zone) +{ + + if (isthreaded) + jemalloc_postfork(); +} + +malloc_zone_t * +create_zone(void) +{ + + zone.size = (void *)zone_size; + zone.malloc = (void *)zone_malloc; + zone.calloc = (void *)zone_calloc; + zone.valloc = (void *)zone_valloc; + zone.free = (void *)zone_free; + zone.realloc = (void *)zone_realloc; + zone.destroy = (void *)zone_destroy; + zone.zone_name = "jemalloc_zone"; + zone.batch_malloc = NULL; + zone.batch_free = NULL; + zone.introspect = &zone_introspect; + zone.version = JEMALLOC_ZONE_VERSION; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone.memalign = zone_memalign; + zone.free_definite_size = zone_free_definite_size; +#endif + + zone_introspect.enumerator = NULL; + zone_introspect.good_size = (void *)zone_good_size; + zone_introspect.check = NULL; + zone_introspect.print = NULL; + zone_introspect.log = NULL; + zone_introspect.force_lock = (void *)zone_force_lock; + zone_introspect.force_unlock = (void *)zone_force_unlock; + zone_introspect.statistics = NULL; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone_introspect.zone_locked = NULL; +#endif + + return (&zone); +} + +static size_t +ozone_size(malloc_zone_t *zone, void *ptr) +{ + size_t ret; + + ret = ivsalloc(ptr); + if (ret == 0) + ret = szone.size(zone, ptr); + + return (ret); +} + +static void +ozone_free(malloc_zone_t *zone, void *ptr) +{ + + if (ivsalloc(ptr) != 0) + JEMALLOC_P(free)(ptr); + else { + size_t size = szone.size(zone, ptr); + if (size != 0) + (szone.free)(zone, ptr); + } +} + +static void * +ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size) +{ + size_t oldsize; + + if (ptr == NULL) + return (JEMALLOC_P(malloc)(size)); + + oldsize = ivsalloc(ptr); + if (oldsize != 0) + return (JEMALLOC_P(realloc)(ptr, size)); + else { + oldsize = szone.size(zone, ptr); + if (oldsize == 0) + return (JEMALLOC_P(malloc)(size)); + else { + void *ret = JEMALLOC_P(malloc)(size); + if (ret != NULL) { + memcpy(ret, ptr, (oldsize < size) ? oldsize : + size); + (szone.free)(zone, ptr); + } + return (ret); + } + } +} + +static unsigned +ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results, + unsigned num_requested) +{ + + /* Don't bother implementing this interface, since it isn't required. */ + return (0); +} + +static void +ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num) +{ + unsigned i; + + for (i = 0; i < num; i++) + ozone_free(zone, to_be_freed[i]); +} + +#if (JEMALLOC_ZONE_VERSION >= 6) +static void +ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) +{ + + if (ivsalloc(ptr) != 0) { + assert(ivsalloc(ptr) == size); + JEMALLOC_P(free)(ptr); + } else { + assert(size == szone.size(zone, ptr)); + szone.free_definite_size(zone, ptr, size); + } +} +#endif + +static void +ozone_force_lock(malloc_zone_t *zone) +{ + + /* jemalloc locking is taken care of by the normal jemalloc zone. */ + szone.introspect->force_lock(zone); +} + +static void +ozone_force_unlock(malloc_zone_t *zone) +{ + + /* jemalloc locking is taken care of by the normal jemalloc zone. */ + szone.introspect->force_unlock(zone); +} + +/* + * Overlay the default scalable zone (szone) such that existing allocations are + * drained, and further allocations come from jemalloc. This is necessary + * because Core Foundation directly accesses and uses the szone before the + * jemalloc library is even loaded. + */ +void +szone2ozone(malloc_zone_t *zone) +{ + + /* + * Stash a copy of the original szone so that we can call its + * functions as needed. Note that the internally, the szone stores its + * bookkeeping data structures immediately following the malloc_zone_t + * header, so when calling szone functions, we need to pass a pointer + * to the original zone structure. + */ + memcpy(&szone, zone, sizeof(malloc_zone_t)); + + zone->size = (void *)ozone_size; + zone->malloc = (void *)zone_malloc; + zone->calloc = (void *)zone_calloc; + zone->valloc = (void *)zone_valloc; + zone->free = (void *)ozone_free; + zone->realloc = (void *)ozone_realloc; + zone->destroy = (void *)zone_destroy; + zone->zone_name = "jemalloc_ozone"; + zone->batch_malloc = ozone_batch_malloc; + zone->batch_free = ozone_batch_free; + zone->introspect = &ozone_introspect; + zone->version = JEMALLOC_ZONE_VERSION; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone->memalign = zone_memalign; + zone->free_definite_size = ozone_free_definite_size; +#endif + + ozone_introspect.enumerator = NULL; + ozone_introspect.good_size = (void *)zone_good_size; + ozone_introspect.check = NULL; + ozone_introspect.print = NULL; + ozone_introspect.log = NULL; + ozone_introspect.force_lock = (void *)ozone_force_lock; + ozone_introspect.force_unlock = (void *)ozone_force_unlock; + ozone_introspect.statistics = NULL; +#if (JEMALLOC_ZONE_VERSION >= 6) + ozone_introspect.zone_locked = NULL; +#endif +} diff --git a/jemalloc/test/thread_arena.c b/jemalloc/test/thread_arena.c index 99e9669a..d52435fa 100644 --- a/jemalloc/test/thread_arena.c +++ b/jemalloc/test/thread_arena.c @@ -3,6 +3,7 @@ #include #include +#define JEMALLOC_MANGLE #include "jemalloc/jemalloc.h" void * @@ -13,10 +14,10 @@ thread_start(void *arg) size_t size; int err; - malloc(1); + JEMALLOC_P(malloc)(1); size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", &arena_ind, &size, + if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, &main_arena_ind, sizeof(main_arena_ind)))) { fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); @@ -37,10 +38,11 @@ main(void) fprintf(stderr, "Test begin\n"); - malloc(1); + JEMALLOC_P(malloc)(1); size = sizeof(arena_ind); - if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) { + if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL, + 0))) { fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, strerror(err)); ret = 1;