Implement per thread heap profiling.

Rename data structures (prof_thr_cnt_t-->prof_tctx_t,
prof_ctx_t-->prof_gctx_t), and convert to storing a prof_tctx_t for
sampled objects.

Convert PROF_ALLOC_PREP() to prof_alloc_prep(), since precise backtrace
depth within jemalloc functions is no longer an issue (pprof prunes
irrelevant frames).

Implement mallctl's:
- prof.reset implements full sample data reset, and optional change of
  sample interval.
- prof.lg_sample reads the current sample interval (opt.lg_prof_sample
  was the permanent source of truth prior to prof.reset).
- thread.prof.name provides naming capability for threads within heap
  profile dumps.
- thread.prof.active makes it possible to activate/deactivate heap
  profiling for individual threads.

Modify the heap dump files to contain per thread heap profile data.
This change is incompatible with the existing pprof, which will require
enhancements to read and process the enriched data.
This commit is contained in:
Jason Evans 2014-08-18 16:22:13 -07:00
parent 1628e8615e
commit 602c8e0971
11 changed files with 1235 additions and 724 deletions

View File

@ -1047,7 +1047,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<varlistentry id="opt.lg_prof_sample"> <varlistentry id="opt.lg_prof_sample">
<term> <term>
<mallctl>opt.lg_prof_sample</mallctl> <mallctl>opt.lg_prof_sample</mallctl>
(<type>ssize_t</type>) (<type>size_t</type>)
<literal>r-</literal> <literal>r-</literal>
[<option>--enable-prof</option>] [<option>--enable-prof</option>]
</term> </term>
@ -1243,6 +1243,35 @@ malloc_conf = "xmalloc:true";]]></programlisting>
the developer may find manual flushing useful.</para></listitem> the developer may find manual flushing useful.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry id="thread.prof.name">
<term>
<mallctl>thread.prof.name</mallctl>
(<type>const char *</type>)
<literal>rw</literal>
[<option>--enable-prof</option>]
</term>
<listitem><para>Get/set the descriptive name associated with the calling
thread in memory profile dumps. An internal copy of the name string is
created, so the input string need not be maintained after this interface
completes execution. The output string of this interface should be
copied for non-ephemeral uses, because multiple implementation details
can cause asynchronous string deallocation.</para></listitem>
</varlistentry>
<varlistentry id="thread.prof.active">
<term>
<mallctl>thread.prof.active</mallctl>
(<type>bool</type>)
<literal>rw</literal>
[<option>--enable-prof</option>]
</term>
<listitem><para>Control whether sampling is currently active for the
calling thread. This is a deactivation mechanism in addition to <link
linkend="prof.active"><mallctl>prof.active</mallctl></link>; both must
be active for the calling thread to sample. This flag is enabled by
default.</para></listitem>
</varlistentry>
<varlistentry id="arena.i.purge"> <varlistentry id="arena.i.purge">
<term> <term>
<mallctl>arena.&lt;i&gt;.purge</mallctl> <mallctl>arena.&lt;i&gt;.purge</mallctl>
@ -1492,6 +1521,31 @@ malloc_conf = "xmalloc:true";]]></programlisting>
option.</para></listitem> option.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry id="prof.reset">
<term>
<mallctl>prof.reset</mallctl>
(<type>size_t</type>)
<literal>-w</literal>
[<option>--enable-prof</option>]
</term>
<listitem><para>Reset all memory profile statistics, and optionally
update the sample rate (see <link
linkend="opt.lg_prof_sample"><mallctl>opt.lg_prof_sample</mallctl></link>).
</para></listitem>
</varlistentry>
<varlistentry id="prof.lg_sample">
<term>
<mallctl>prof.lg_sample</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
[<option>--enable-prof</option>]
</term>
<listitem><para>Get the sample rate (see <link
linkend="opt.lg_prof_sample"><mallctl>opt.lg_prof_sample</mallctl></link>).
</para></listitem>
</varlistentry>
<varlistentry id="prof.interval"> <varlistentry id="prof.interval">
<term> <term>
<mallctl>prof.interval</mallctl> <mallctl>prof.interval</mallctl>

View File

@ -58,7 +58,7 @@ typedef struct arena_s arena_t;
struct arena_chunk_map_s { struct arena_chunk_map_s {
#ifndef JEMALLOC_PROF #ifndef JEMALLOC_PROF
/* /*
* Overlay prof_ctx in order to allow it to be referenced by dead code. * Overlay prof_tctx in order to allow it to be referenced by dead code.
* Such antics aren't warranted for per arena data structures, but * Such antics aren't warranted for per arena data structures, but
* chunk map overhead accounts for a percentage of memory, rather than * chunk map overhead accounts for a percentage of memory, rather than
* being just a fixed cost. * being just a fixed cost.
@ -75,7 +75,7 @@ struct arena_chunk_map_s {
rb_node(arena_chunk_map_t) rb_link; rb_node(arena_chunk_map_t) rb_link;
/* Profile counters, used for large object runs. */ /* Profile counters, used for large object runs. */
prof_ctx_t *prof_ctx; prof_tctx_t *prof_tctx;
#ifndef JEMALLOC_PROF #ifndef JEMALLOC_PROF
}; /* union { ... }; */ }; /* union { ... }; */
#endif #endif
@ -472,8 +472,8 @@ size_t arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); size_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
const void *ptr); const void *ptr);
prof_ctx_t *arena_prof_ctx_get(const void *ptr); prof_tctx_t *arena_prof_tctx_get(const void *ptr);
void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache); void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
size_t arena_salloc(const void *ptr, bool demote); size_t arena_salloc(const void *ptr, bool demote);
void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache); void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache);
@ -987,10 +987,10 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
return (regind); return (regind);
} }
JEMALLOC_INLINE prof_ctx_t * JEMALLOC_INLINE prof_tctx_t *
arena_prof_ctx_get(const void *ptr) arena_prof_tctx_get(const void *ptr)
{ {
prof_ctx_t *ret; prof_tctx_t *ret;
arena_chunk_t *chunk; arena_chunk_t *chunk;
size_t pageind, mapbits; size_t pageind, mapbits;
@ -1003,15 +1003,15 @@ arena_prof_ctx_get(const void *ptr)
mapbits = arena_mapbits_get(chunk, pageind); mapbits = arena_mapbits_get(chunk, pageind);
assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
if ((mapbits & CHUNK_MAP_LARGE) == 0) if ((mapbits & CHUNK_MAP_LARGE) == 0)
ret = (prof_ctx_t *)(uintptr_t)1U; ret = (prof_tctx_t *)(uintptr_t)1U;
else else
ret = arena_mapp_get(chunk, pageind)->prof_ctx; ret = arena_mapp_get(chunk, pageind)->prof_tctx;
return (ret); return (ret);
} }
JEMALLOC_INLINE void JEMALLOC_INLINE void
arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
{ {
arena_chunk_t *chunk; arena_chunk_t *chunk;
size_t pageind; size_t pageind;
@ -1025,7 +1025,7 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
assert(arena_mapbits_allocated_get(chunk, pageind) != 0); assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
if (arena_mapbits_large_get(chunk, pageind) != 0) if (arena_mapbits_large_get(chunk, pageind) != 0)
arena_mapp_get(chunk, pageind)->prof_ctx = ctx; arena_mapp_get(chunk, pageind)->prof_tctx = tctx;
} }
JEMALLOC_ALWAYS_INLINE void * JEMALLOC_ALWAYS_INLINE void *

View File

@ -16,7 +16,7 @@ struct extent_node_s {
rb_node(extent_node_t) link_ad; rb_node(extent_node_t) link_ad;
/* Profile counters, used for huge objects. */ /* Profile counters, used for huge objects. */
prof_ctx_t *prof_ctx; prof_tctx_t *prof_tctx;
/* Pointer to the extent that this tree node is responsible for. */ /* Pointer to the extent that this tree node is responsible for. */
void *addr; void *addr;

View File

@ -21,8 +21,8 @@ extern huge_dalloc_junk_t *huge_dalloc_junk;
#endif #endif
void huge_dalloc(void *ptr); void huge_dalloc(void *ptr);
size_t huge_salloc(const void *ptr); size_t huge_salloc(const void *ptr);
prof_ctx_t *huge_prof_ctx_get(const void *ptr); prof_tctx_t *huge_prof_tctx_get(const void *ptr);
void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); void huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
bool huge_boot(void); bool huge_boot(void);
void huge_prefork(void); void huge_prefork(void);
void huge_postfork_parent(void); void huge_postfork_parent(void);

View File

@ -48,9 +48,9 @@ arena_prefork
arena_prof_accum arena_prof_accum
arena_prof_accum_impl arena_prof_accum_impl
arena_prof_accum_locked arena_prof_accum_locked
arena_prof_ctx_get
arena_prof_ctx_set
arena_prof_promoted arena_prof_promoted
arena_prof_tctx_get
arena_prof_tctx_set
arena_ptr_small_binind_get arena_ptr_small_binind_get
arena_purge_all arena_purge_all
arena_quarantine_junk_small arena_quarantine_junk_small
@ -208,8 +208,8 @@ huge_palloc
huge_postfork_child huge_postfork_child
huge_postfork_parent huge_postfork_parent
huge_prefork huge_prefork
huge_prof_ctx_get huge_prof_tctx_get
huge_prof_ctx_set huge_prof_tctx_set
huge_ralloc huge_ralloc
huge_ralloc_no_move huge_ralloc_no_move
huge_salloc huge_salloc
@ -287,28 +287,31 @@ opt_zero
p2rz p2rz
pages_purge pages_purge
pow2_ceil pow2_ceil
prof_alloc_prep
prof_backtrace prof_backtrace
prof_boot0 prof_boot0
prof_boot1 prof_boot1
prof_boot2 prof_boot2
prof_bt_count prof_bt_count
prof_ctx_get
prof_ctx_set
prof_dump_open prof_dump_open
prof_free prof_free
prof_free_sampled_object
prof_gdump prof_gdump
prof_idump prof_idump
prof_interval prof_interval
prof_lookup prof_lookup
prof_malloc prof_malloc
prof_malloc_record_object prof_malloc_sample_object
prof_mdump prof_mdump
prof_postfork_child prof_postfork_child
prof_postfork_parent prof_postfork_parent
prof_prefork prof_prefork
prof_realloc prof_realloc
prof_reset
prof_sample_accum_update prof_sample_accum_update
prof_sample_threshold_update prof_sample_threshold_update
prof_tctx_get
prof_tctx_set
prof_tdata_booted prof_tdata_booted
prof_tdata_cleanup prof_tdata_cleanup
prof_tdata_get prof_tdata_get
@ -322,6 +325,10 @@ prof_tdata_tsd_get
prof_tdata_tsd_get_wrapper prof_tdata_tsd_get_wrapper
prof_tdata_tsd_init_head prof_tdata_tsd_init_head
prof_tdata_tsd_set prof_tdata_tsd_set
prof_thread_active_get
prof_thread_active_set
prof_thread_name_get
prof_thread_name_set
quarantine quarantine
quarantine_alloc_hook quarantine_alloc_hook
quarantine_boot quarantine_boot

View File

@ -1,11 +1,10 @@
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_H_TYPES #ifdef JEMALLOC_H_TYPES
typedef uint64_t prof_thr_uid_t;
typedef struct prof_bt_s prof_bt_t; typedef struct prof_bt_s prof_bt_t;
typedef struct prof_cnt_s prof_cnt_t; typedef struct prof_cnt_s prof_cnt_t;
typedef struct prof_thr_cnt_s prof_thr_cnt_t; typedef struct prof_tctx_s prof_tctx_t;
typedef struct prof_ctx_s prof_ctx_t; typedef struct prof_gctx_s prof_gctx_t;
typedef struct prof_tdata_s prof_tdata_t; typedef struct prof_tdata_s prof_tdata_t;
/* Option defaults. */ /* Option defaults. */
@ -34,11 +33,17 @@ typedef struct prof_tdata_s prof_tdata_t;
#define PROF_PRINTF_BUFSIZE 128 #define PROF_PRINTF_BUFSIZE 128
/* /*
* Number of mutexes shared among all ctx's. No space is allocated for these * Number of mutexes shared among all gctx's. No space is allocated for these
* unless profiling is enabled, so it's okay to over-provision. * unless profiling is enabled, so it's okay to over-provision.
*/ */
#define PROF_NCTX_LOCKS 1024 #define PROF_NCTX_LOCKS 1024
/*
* Number of mutexes shared among all tdata's. No space is allocated for these
* unless profiling is enabled, so it's okay to over-provision.
*/
#define PROF_NTDATA_LOCKS 256
/* /*
* prof_tdata pointers close to NULL are used to encode state information that * prof_tdata pointers close to NULL are used to encode state information that
* is used for cleaning up during thread shutdown. * is used for cleaning up during thread shutdown.
@ -66,87 +71,70 @@ typedef struct {
#endif #endif
struct prof_cnt_s { struct prof_cnt_s {
/* /* Profiling counters. */
* Profiling counters. An allocation/deallocation pair can operate on uint64_t curobjs;
* different prof_thr_cnt_t objects that are linked into the same uint64_t curbytes;
* prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
* negative. In principle it is possible for the *bytes counters to
* overflow/underflow, but a general solution would require something
* like 128-bit counters; this implementation doesn't bother to solve
* that problem.
*/
int64_t curobjs;
int64_t curbytes;
uint64_t accumobjs; uint64_t accumobjs;
uint64_t accumbytes; uint64_t accumbytes;
}; };
struct prof_thr_cnt_s { typedef enum {
prof_thr_uid_t thr_uid; prof_tctx_state_nominal,
prof_tctx_state_dumping,
prof_tctx_state_purgatory /* Dumper must finish destroying. */
} prof_tctx_state_t;
/* Linkage into prof_ctx_t's thr_cnts. */ struct prof_tctx_s {
rb_node(prof_thr_cnt_t) thr_cnt_link; /* Thread data for thread that performed the allocation. */
prof_tdata_t *tdata;
/* /* Profiling counters, protected by tdata->lock. */
* Associated context. If a thread frees an object that it did not
* allocate, it is possible that the context is not present in the
* thread's hash table, in which case it must be able to look up the
* context, insert a new prof_thr_cnt_t into the thread's hash table,
* and link it into the prof_ctx_t's thr_cnts.
*/
prof_ctx_t *ctx;
/*
* Threads use memory barriers to update the counters. Since there is
* only ever one writer, the only challenge is for the reader to get a
* consistent read of the counters.
*
* The writer uses this series of operations:
*
* 1) Increment epoch to an odd number.
* 2) Update counters.
* 3) Increment epoch to an even number.
*
* The reader must assure 1) that the epoch is even while it reads the
* counters, and 2) that the epoch doesn't change between the time it
* starts and finishes reading the counters.
*/
unsigned epoch;
/* Profiling counters. */
prof_cnt_t cnts; prof_cnt_t cnts;
};
typedef rb_tree(prof_thr_cnt_t) prof_thr_cnt_tree_t;
struct prof_ctx_s { /* Associated global context. */
/* Protects nlimbo, cnt_merged, and thr_cnts. */ prof_gctx_t *gctx;
/* Linkage into gctx's tctxs. */
rb_node(prof_tctx_t) tctx_link;
/* Current dump-related state, protected by gctx->lock. */
prof_tctx_state_t state;
/*
* Copy of cnts snapshotted during early dump phase, protected by
* dump_mtx.
*/
prof_cnt_t dump_cnts;
};
typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
struct prof_gctx_s {
/* Protects nlimbo, cnt_summed, and tctxs. */
malloc_mutex_t *lock; malloc_mutex_t *lock;
/* /*
* Number of threads that currently cause this ctx to be in a state of * Number of threads that currently cause this gctx to be in a state of
* limbo due to one of: * limbo due to one of:
* - Initializing per thread counters associated with this ctx. * - Initializing this gctx.
* - Preparing to destroy this ctx. * - Initializing per thread counters associated with this gctx.
* - Dumping a heap profile that includes this ctx. * - Preparing to destroy this gctx.
* - Dumping a heap profile that includes this gctx.
* nlimbo must be 1 (single destroyer) in order to safely destroy the * nlimbo must be 1 (single destroyer) in order to safely destroy the
* ctx. * gctx.
*/ */
unsigned nlimbo; unsigned nlimbo;
/* Temporary storage for summation during dump. */
prof_cnt_t cnt_summed;
/* When threads exit, they merge their stats into cnt_merged. */
prof_cnt_t cnt_merged;
/* /*
* Tree of profile counters, one for each thread that has allocated in * Tree of profile counters, one for each thread that has allocated in
* this context. * this context.
*/ */
prof_thr_cnt_tree_t thr_cnts; prof_tctx_tree_t tctxs;
/* Linkage for tree of contexts to be dumped. */ /* Linkage for tree of contexts to be dumped. */
rb_node(prof_ctx_t) dump_link; rb_node(prof_gctx_t) dump_link;
/* Temporary storage for summation during dump. */
prof_cnt_t cnt_summed;
/* Associated backtrace. */ /* Associated backtrace. */
prof_bt_t bt; prof_bt_t bt;
@ -154,21 +142,34 @@ struct prof_ctx_s {
/* Backtrace vector, variable size, referred to by bt. */ /* Backtrace vector, variable size, referred to by bt. */
void *vec[1]; void *vec[1];
}; };
typedef rb_tree(prof_ctx_t) prof_ctx_tree_t; typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
typedef enum {
prof_tdata_state_attached, /* Active thread attached, data valid. */
prof_tdata_state_detached, /* Defunct thread, data remain valid. */
prof_tdata_state_expired /* Predates reset, omit data from dump. */
} prof_tdata_state_t;
struct prof_tdata_s { struct prof_tdata_s {
malloc_mutex_t *lock;
/* Monotonically increasing unique thread identifier. */
uint64_t thr_uid;
/* Included in heap profile dumps if non-NULL. */
char *thread_name;
prof_tdata_state_t state;
rb_node(prof_tdata_t) tdata_link;
/* /*
* Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread tracks * Hash of (prof_bt_t *)-->(prof_tctx_t *). Each thread tracks
* backtraces for which it has non-zero allocation/deallocation counters * backtraces for which it has non-zero allocation/deallocation counters
* associated with thread-specific prof_thr_cnt_t objects. Other * associated with thread-specific prof_tctx_t objects. Other threads
* threads may read the prof_thr_cnt_t contents, but no others will ever * may write to prof_tctx_t contents when freeing associated objects.
* write them.
*
* Upon thread exit, the thread must merge all the prof_thr_cnt_t
* counter data into the associated prof_ctx_t objects, and unlink/free
* the prof_thr_cnt_t objects.
*/ */
ckh_t bt2cnt; ckh_t bt2tctx;
/* Sampling state. */ /* Sampling state. */
uint64_t prng_state; uint64_t prng_state;
@ -179,9 +180,27 @@ struct prof_tdata_s {
bool enq_idump; bool enq_idump;
bool enq_gdump; bool enq_gdump;
/*
* Set to true during an early dump phase for tdata's which are
* currently being dumped. New threads' tdata's have this initialized
* to false so that they aren't accidentally included in later dump
* phases.
*/
bool dumping;
/*
* True if profiling is active for this tdata's thread
* (thread.prof.active mallctl).
*/
bool active;
/* Temporary storage for summation during dump. */
prof_cnt_t cnt_summed;
/* Backtrace vector, used for calls to prof_backtrace(). */ /* Backtrace vector, used for calls to prof_backtrace(). */
void *vec[PROF_BT_MAX]; void *vec[PROF_BT_MAX];
}; };
typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
#endif /* JEMALLOC_H_STRUCTS */ #endif /* JEMALLOC_H_STRUCTS */
/******************************************************************************/ /******************************************************************************/
@ -217,9 +236,18 @@ extern char opt_prof_prefix[
*/ */
extern uint64_t prof_interval; extern uint64_t prof_interval;
/*
* Initialized as opt_lg_prof_sample, and potentially modified during profiling
* resets.
*/
extern size_t lg_prof_sample;
void prof_malloc_sample_object(const void *ptr, size_t usize,
prof_tctx_t *tctx);
void prof_free_sampled_object(size_t usize, prof_tctx_t *tctx);
void bt_init(prof_bt_t *bt, void **vec); void bt_init(prof_bt_t *bt, void **vec);
void prof_backtrace(prof_bt_t *bt); void prof_backtrace(prof_bt_t *bt);
prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); prof_tctx_t *prof_lookup(prof_bt_t *bt);
#ifdef JEMALLOC_JET #ifdef JEMALLOC_JET
size_t prof_bt_count(void); size_t prof_bt_count(void);
typedef int (prof_dump_open_t)(bool, const char *); typedef int (prof_dump_open_t)(bool, const char *);
@ -229,53 +257,44 @@ void prof_idump(void);
bool prof_mdump(const char *filename); bool prof_mdump(const char *filename);
void prof_gdump(void); void prof_gdump(void);
prof_tdata_t *prof_tdata_init(void); prof_tdata_t *prof_tdata_init(void);
prof_tdata_t *prof_tdata_reinit(prof_tdata_t *tdata);
void prof_reset(size_t lg_sample);
void prof_tdata_cleanup(void *arg); void prof_tdata_cleanup(void *arg);
const char *prof_thread_name_get(void);
bool prof_thread_name_set(const char *thread_name);
bool prof_thread_active_get(void);
bool prof_thread_active_set(bool active);
void prof_boot0(void); void prof_boot0(void);
void prof_boot1(void); void prof_boot1(void);
bool prof_boot2(void); bool prof_boot2(void);
void prof_prefork(void); void prof_prefork(void);
void prof_postfork_parent(void); void prof_postfork_parent(void);
void prof_postfork_child(void); void prof_postfork_child(void);
void prof_sample_threshold_update(prof_tdata_t *prof_tdata); void prof_sample_threshold_update(prof_tdata_t *tdata);
#endif /* JEMALLOC_H_EXTERNS */ #endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_H_INLINES #ifdef JEMALLOC_H_INLINES
#define PROF_ALLOC_PREP(size, ret) do { \
prof_tdata_t *prof_tdata; \
prof_bt_t bt; \
\
assert(size == s2u(size)); \
\
if (!opt_prof_active || \
prof_sample_accum_update(size, false, &prof_tdata)) { \
ret = (prof_thr_cnt_t *)(uintptr_t)1U; \
} else { \
bt_init(&bt, prof_tdata->vec); \
prof_backtrace(&bt); \
ret = prof_lookup(&bt); \
} \
} while (0)
#ifndef JEMALLOC_ENABLE_INLINE #ifndef JEMALLOC_ENABLE_INLINE
malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *) malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
prof_tdata_t *prof_tdata_get(bool create); prof_tdata_t *prof_tdata_get(bool create);
bool prof_sample_accum_update(size_t size, bool commit, bool prof_sample_accum_update(size_t usize, bool commit,
prof_tdata_t **prof_tdata_out); prof_tdata_t **tdata_out);
prof_ctx_t *prof_ctx_get(const void *ptr); prof_tctx_t *prof_alloc_prep(size_t usize);
void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); prof_tctx_t *prof_tctx_get(const void *ptr);
void prof_malloc_record_object(const void *ptr, size_t usize, void prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
prof_thr_cnt_t *cnt); void prof_malloc_sample_object(const void *ptr, size_t usize,
void prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt); prof_tctx_t *tctx);
void prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, void prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
size_t old_usize, prof_ctx_t *old_ctx); void prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx,
void prof_free(const void *ptr, size_t size); size_t old_usize, prof_tctx_t *old_tctx);
void prof_free(const void *ptr, size_t usize);
#endif #endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ /* Thread-specific backtrace cache, used to reduce bt2gctx contention. */
malloc_tsd_externs(prof_tdata, prof_tdata_t *) malloc_tsd_externs(prof_tdata, prof_tdata_t *)
malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL, malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
prof_tdata_cleanup) prof_tdata_cleanup)
@ -283,21 +302,27 @@ malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
JEMALLOC_INLINE prof_tdata_t * JEMALLOC_INLINE prof_tdata_t *
prof_tdata_get(bool create) prof_tdata_get(bool create)
{ {
prof_tdata_t *prof_tdata; prof_tdata_t *tdata;
cassert(config_prof); cassert(config_prof);
prof_tdata = *prof_tdata_tsd_get(); tdata = *prof_tdata_tsd_get();
if (create && prof_tdata == NULL) if (create) {
prof_tdata = prof_tdata_init(); if (tdata == NULL)
tdata = prof_tdata_init();
else if (tdata->state == prof_tdata_state_expired)
tdata = prof_tdata_reinit(tdata);
assert(tdata == NULL || tdata->state ==
prof_tdata_state_attached);
}
return (prof_tdata); return (tdata);
} }
JEMALLOC_INLINE prof_ctx_t * JEMALLOC_INLINE prof_tctx_t *
prof_ctx_get(const void *ptr) prof_tctx_get(const void *ptr)
{ {
prof_ctx_t *ret; prof_tctx_t *ret;
arena_chunk_t *chunk; arena_chunk_t *chunk;
cassert(config_prof); cassert(config_prof);
@ -306,15 +331,15 @@ prof_ctx_get(const void *ptr)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) { if (chunk != ptr) {
/* Region. */ /* Region. */
ret = arena_prof_ctx_get(ptr); ret = arena_prof_tctx_get(ptr);
} else } else
ret = huge_prof_ctx_get(ptr); ret = huge_prof_tctx_get(ptr);
return (ret); return (ret);
} }
JEMALLOC_INLINE void JEMALLOC_INLINE void
prof_ctx_set(const void *ptr, prof_ctx_t *ctx) prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
{ {
arena_chunk_t *chunk; arena_chunk_t *chunk;
@ -324,66 +349,62 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr) { if (chunk != ptr) {
/* Region. */ /* Region. */
arena_prof_ctx_set(ptr, ctx); arena_prof_tctx_set(ptr, tctx);
} else } else
huge_prof_ctx_set(ptr, ctx); huge_prof_tctx_set(ptr, tctx);
} }
JEMALLOC_INLINE bool JEMALLOC_INLINE bool
prof_sample_accum_update(size_t size, bool commit, prof_sample_accum_update(size_t usize, bool commit, prof_tdata_t **tdata_out)
prof_tdata_t **prof_tdata_out)
{ {
prof_tdata_t *prof_tdata; prof_tdata_t *tdata;
cassert(config_prof); cassert(config_prof);
prof_tdata = prof_tdata_get(true); tdata = prof_tdata_get(true);
if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
prof_tdata = NULL; tdata = NULL;
if (prof_tdata_out != NULL) if (tdata_out != NULL)
*prof_tdata_out = prof_tdata; *tdata_out = tdata;
if (prof_tdata == NULL) if (tdata == NULL)
return (true); return (true);
if (prof_tdata->bytes_until_sample >= size) { if (tdata->bytes_until_sample >= usize) {
if (commit) if (commit)
prof_tdata->bytes_until_sample -= size; tdata->bytes_until_sample -= usize;
return (true); return (true);
} else { } else {
/* Compute new sample threshold. */ /* Compute new sample threshold. */
if (commit) if (commit)
prof_sample_threshold_update(prof_tdata); prof_sample_threshold_update(tdata);
return (false); return (tdata->active == false);
} }
} }
JEMALLOC_INLINE void JEMALLOC_INLINE prof_tctx_t *
prof_malloc_record_object(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) { prof_alloc_prep(size_t usize)
prof_ctx_set(ptr, cnt->ctx); {
prof_tctx_t *ret;
prof_tdata_t *tdata;
prof_bt_t bt;
cnt->epoch++; assert(usize == s2u(usize));
/*********/
mb_write(); if (!opt_prof_active || prof_sample_accum_update(usize, false, &tdata))
/*********/ ret = (prof_tctx_t *)(uintptr_t)1U;
cnt->cnts.curobjs++; else {
cnt->cnts.curbytes += usize; bt_init(&bt, tdata->vec);
if (opt_prof_accum) { prof_backtrace(&bt);
cnt->cnts.accumobjs++; ret = prof_lookup(&bt);
cnt->cnts.accumbytes += usize;
} }
/*********/
mb_write(); return (ret);
/*********/
cnt->epoch++;
/*********/
mb_write();
/*********/
} }
JEMALLOC_INLINE void JEMALLOC_INLINE void
prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt) prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
{ {
cassert(config_prof); cassert(config_prof);
@ -392,131 +413,60 @@ prof_malloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt)
if (prof_sample_accum_update(usize, true, NULL)) { if (prof_sample_accum_update(usize, true, NULL)) {
/* /*
* Don't sample. For malloc()-like allocation, it is * Don't sample. For malloc()-like allocation, it is always
* always possible to tell in advance how large an * possible to tell in advance how large an object's usable size
* object's usable size will be, so there should never * will be, so there should never be a difference between the
* be a difference between the usize passed to * usize passed to PROF_ALLOC_PREP() and prof_malloc().
* PROF_ALLOC_PREP() and prof_malloc().
*/ */
assert((uintptr_t)cnt == (uintptr_t)1U); assert((uintptr_t)tctx == (uintptr_t)1U);
} }
if ((uintptr_t)cnt > (uintptr_t)1U) if ((uintptr_t)tctx > (uintptr_t)1U)
prof_malloc_record_object(ptr, usize, cnt); prof_malloc_sample_object(ptr, usize, tctx);
else else
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
} }
JEMALLOC_INLINE void JEMALLOC_INLINE void
prof_realloc(const void *ptr, size_t usize, prof_thr_cnt_t *cnt, prof_realloc(const void *ptr, size_t usize, prof_tctx_t *tctx, size_t old_usize,
size_t old_usize, prof_ctx_t *old_ctx) prof_tctx_t *old_tctx)
{ {
prof_thr_cnt_t *told_cnt;
cassert(config_prof); cassert(config_prof);
assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
if (ptr != NULL) { if (ptr != NULL) {
assert(usize == isalloc(ptr, true)); assert(usize == isalloc(ptr, true));
if (prof_sample_accum_update(usize, true, NULL)) { if (prof_sample_accum_update(usize, true, NULL)) {
/* /*
* Don't sample. The usize passed to * Don't sample. The usize passed to PROF_ALLOC_PREP()
* PROF_ALLOC_PREP() was larger than what * was larger than what actually got allocated, so a
* actually got allocated, so a backtrace was * backtrace was captured for this allocation, even
* captured for this allocation, even though * though its actual usize was insufficient to cross the
* its actual usize was insufficient to cross * sample threshold.
* the sample threshold.
*/ */
cnt = (prof_thr_cnt_t *)(uintptr_t)1U; tctx = (prof_tctx_t *)(uintptr_t)1U;
} }
} }
if ((uintptr_t)old_ctx > (uintptr_t)1U) { if ((uintptr_t)old_tctx > (uintptr_t)1U)
told_cnt = prof_lookup(&old_ctx->bt); prof_free_sampled_object(old_usize, old_tctx);
if (told_cnt == NULL) { if ((uintptr_t)tctx > (uintptr_t)1U)
/* prof_malloc_sample_object(ptr, usize, tctx);
* It's too late to propagate OOM for this realloc(), else
* so operate directly on old_cnt->ctx->cnt_merged. prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
*/
malloc_mutex_lock(old_ctx->lock);
old_ctx->cnt_merged.curobjs--;
old_ctx->cnt_merged.curbytes -= old_usize;
malloc_mutex_unlock(old_ctx->lock);
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
}
} else
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
if ((uintptr_t)told_cnt > (uintptr_t)1U)
told_cnt->epoch++;
if ((uintptr_t)cnt > (uintptr_t)1U) {
prof_ctx_set(ptr, cnt->ctx);
cnt->epoch++;
} else if (ptr != NULL)
prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
/*********/
mb_write();
/*********/
if ((uintptr_t)told_cnt > (uintptr_t)1U) {
told_cnt->cnts.curobjs--;
told_cnt->cnts.curbytes -= old_usize;
}
if ((uintptr_t)cnt > (uintptr_t)1U) {
cnt->cnts.curobjs++;
cnt->cnts.curbytes += usize;
if (opt_prof_accum) {
cnt->cnts.accumobjs++;
cnt->cnts.accumbytes += usize;
}
}
/*********/
mb_write();
/*********/
if ((uintptr_t)told_cnt > (uintptr_t)1U)
told_cnt->epoch++;
if ((uintptr_t)cnt > (uintptr_t)1U)
cnt->epoch++;
/*********/
mb_write(); /* Not strictly necessary. */
} }
JEMALLOC_INLINE void JEMALLOC_INLINE void
prof_free(const void *ptr, size_t size) prof_free(const void *ptr, size_t usize)
{ {
prof_ctx_t *ctx = prof_ctx_get(ptr); prof_tctx_t *tctx = prof_tctx_get(ptr);
cassert(config_prof); cassert(config_prof);
assert(usize == isalloc(ptr, true));
if ((uintptr_t)ctx > (uintptr_t)1) { if ((uintptr_t)tctx > (uintptr_t)1U)
prof_thr_cnt_t *tcnt; prof_free_sampled_object(usize, tctx);
assert(size == isalloc(ptr, true));
tcnt = prof_lookup(&ctx->bt);
if (tcnt != NULL) {
tcnt->epoch++;
/*********/
mb_write();
/*********/
tcnt->cnts.curobjs--;
tcnt->cnts.curbytes -= size;
/*********/
mb_write();
/*********/
tcnt->epoch++;
/*********/
mb_write();
/*********/
} else {
/*
* OOM during free() cannot be propagated, so operate
* directly on cnt->ctx->cnt_merged.
*/
malloc_mutex_lock(ctx->lock);
ctx->cnt_merged.curobjs--;
ctx->cnt_merged.curbytes -= size;
malloc_mutex_unlock(ctx->lock);
}
}
} }
#endif #endif

View File

@ -68,6 +68,8 @@ CTL_PROTO(version)
CTL_PROTO(epoch) CTL_PROTO(epoch)
CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_enabled)
CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_tcache_flush)
CTL_PROTO(thread_prof_name)
CTL_PROTO(thread_prof_active)
CTL_PROTO(thread_arena) CTL_PROTO(thread_arena)
CTL_PROTO(thread_allocated) CTL_PROTO(thread_allocated)
CTL_PROTO(thread_allocatedp) CTL_PROTO(thread_allocatedp)
@ -132,7 +134,9 @@ CTL_PROTO(arenas_nlruns)
CTL_PROTO(arenas_extend) CTL_PROTO(arenas_extend)
CTL_PROTO(prof_active) CTL_PROTO(prof_active)
CTL_PROTO(prof_dump) CTL_PROTO(prof_dump)
CTL_PROTO(prof_reset)
CTL_PROTO(prof_interval) CTL_PROTO(prof_interval)
CTL_PROTO(lg_prof_sample)
CTL_PROTO(stats_chunks_current) CTL_PROTO(stats_chunks_current)
CTL_PROTO(stats_chunks_total) CTL_PROTO(stats_chunks_total)
CTL_PROTO(stats_chunks_high) CTL_PROTO(stats_chunks_high)
@ -196,18 +200,24 @@ CTL_PROTO(stats_mapped)
*/ */
#define INDEX(i) {false}, i##_index #define INDEX(i) {false}, i##_index
static const ctl_named_node_t tcache_node[] = { static const ctl_named_node_t thread_tcache_node[] = {
{NAME("enabled"), CTL(thread_tcache_enabled)}, {NAME("enabled"), CTL(thread_tcache_enabled)},
{NAME("flush"), CTL(thread_tcache_flush)} {NAME("flush"), CTL(thread_tcache_flush)}
}; };
static const ctl_named_node_t thread_prof_node[] = {
{NAME("name"), CTL(thread_prof_name)},
{NAME("active"), CTL(thread_prof_active)}
};
static const ctl_named_node_t thread_node[] = { static const ctl_named_node_t thread_node[] = {
{NAME("arena"), CTL(thread_arena)}, {NAME("arena"), CTL(thread_arena)},
{NAME("allocated"), CTL(thread_allocated)}, {NAME("allocated"), CTL(thread_allocated)},
{NAME("allocatedp"), CTL(thread_allocatedp)}, {NAME("allocatedp"), CTL(thread_allocatedp)},
{NAME("deallocated"), CTL(thread_deallocated)}, {NAME("deallocated"), CTL(thread_deallocated)},
{NAME("deallocatedp"), CTL(thread_deallocatedp)}, {NAME("deallocatedp"), CTL(thread_deallocatedp)},
{NAME("tcache"), CHILD(named, tcache)} {NAME("tcache"), CHILD(named, thread_tcache)},
{NAME("prof"), CHILD(named, thread_prof)}
}; };
static const ctl_named_node_t config_node[] = { static const ctl_named_node_t config_node[] = {
@ -311,7 +321,9 @@ static const ctl_named_node_t arenas_node[] = {
static const ctl_named_node_t prof_node[] = { static const ctl_named_node_t prof_node[] = {
{NAME("active"), CTL(prof_active)}, {NAME("active"), CTL(prof_active)},
{NAME("dump"), CTL(prof_dump)}, {NAME("dump"), CTL(prof_dump)},
{NAME("interval"), CTL(prof_interval)} {NAME("reset"), CTL(prof_reset)},
{NAME("interval"), CTL(prof_interval)},
{NAME("lg_sample"), CTL(lg_prof_sample)}
}; };
static const ctl_named_node_t stats_chunks_node[] = { static const ctl_named_node_t stats_chunks_node[] = {
@ -1281,6 +1293,62 @@ label_return:
return (ret); return (ret);
} }
static int
thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp,
size_t *oldlenp, void *newp, size_t newlen)
{
int ret;
const char *oldname;
if (config_prof == false)
return (ENOENT);
oldname = prof_thread_name_get();
if (newp != NULL) {
if (newlen != sizeof(const char *)) {
ret = EINVAL;
goto label_return;
}
if (prof_thread_name_set(*(const char **)newp)) {
ret = EAGAIN;
goto label_return;
}
}
READ(oldname, const char *);
ret = 0;
label_return:
return (ret);
}
static int
thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp,
size_t *oldlenp, void *newp, size_t newlen)
{
int ret;
bool oldval;
if (config_prof == false)
return (ENOENT);
oldval = prof_thread_active_get();
if (newp != NULL) {
if (newlen != sizeof(bool)) {
ret = EINVAL;
goto label_return;
}
if (prof_thread_active_set(*(bool *)newp)) {
ret = EAGAIN;
goto label_return;
}
}
READ(oldval, bool);
ret = 0;
label_return:
return (ret);
}
/******************************************************************************/ /******************************************************************************/
/* ctl_mutex must be held during execution of this function. */ /* ctl_mutex must be held during execution of this function. */
@ -1601,7 +1669,30 @@ label_return:
return (ret); return (ret);
} }
static int
prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
void *newp, size_t newlen)
{
int ret;
size_t lg_sample = lg_prof_sample;
if (config_prof == false)
return (ENOENT);
WRITEONLY();
WRITE(lg_sample, size_t);
if (lg_sample >= (sizeof(uint64_t) << 3))
lg_sample = (sizeof(uint64_t) << 3) - 1;
prof_reset(lg_sample);
ret = 0;
label_return:
return (ret);
}
CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t) CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t)
CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t)
/******************************************************************************/ /******************************************************************************/

View File

@ -197,10 +197,10 @@ huge_salloc(const void *ptr)
return (ret); return (ret);
} }
prof_ctx_t * prof_tctx_t *
huge_prof_ctx_get(const void *ptr) huge_prof_tctx_get(const void *ptr)
{ {
prof_ctx_t *ret; prof_tctx_t *ret;
extent_node_t *node, key; extent_node_t *node, key;
malloc_mutex_lock(&huge_mtx); malloc_mutex_lock(&huge_mtx);
@ -210,7 +210,7 @@ huge_prof_ctx_get(const void *ptr)
node = extent_tree_ad_search(&huge, &key); node = extent_tree_ad_search(&huge, &key);
assert(node != NULL); assert(node != NULL);
ret = node->prof_ctx; ret = node->prof_tctx;
malloc_mutex_unlock(&huge_mtx); malloc_mutex_unlock(&huge_mtx);
@ -218,7 +218,7 @@ huge_prof_ctx_get(const void *ptr)
} }
void void
huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
{ {
extent_node_t *node, key; extent_node_t *node, key;
@ -229,7 +229,7 @@ huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
node = extent_tree_ad_search(&huge, &key); node = extent_tree_ad_search(&huge, &key);
assert(node != NULL); assert(node != NULL);
node->prof_ctx = ctx; node->prof_tctx = tctx;
malloc_mutex_unlock(&huge_mtx); malloc_mutex_unlock(&huge_mtx);
} }

View File

@ -636,9 +636,9 @@ malloc_conf_init(void)
"prof_prefix", "jeprof") "prof_prefix", "jeprof")
CONF_HANDLE_BOOL(opt_prof_active, "prof_active", CONF_HANDLE_BOOL(opt_prof_active, "prof_active",
true) true)
CONF_HANDLE_SSIZE_T(opt_lg_prof_sample, CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
"lg_prof_sample", 0, "lg_prof_sample", 0,
(sizeof(uint64_t) << 3) - 1) (sizeof(uint64_t) << 3) - 1, true)
CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum", CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum",
true) true)
CONF_HANDLE_SSIZE_T(opt_lg_prof_interval, CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
@ -863,11 +863,11 @@ malloc_init_hard(void)
*/ */
static void * static void *
imalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) imalloc_prof_sample(size_t usize, prof_tctx_t *tctx)
{ {
void *p; void *p;
if (cnt == NULL) if (tctx == NULL)
return (NULL); return (NULL);
if (usize <= SMALL_MAXCLASS) { if (usize <= SMALL_MAXCLASS) {
p = imalloc(SMALL_MAXCLASS+1); p = imalloc(SMALL_MAXCLASS+1);
@ -884,16 +884,16 @@ JEMALLOC_ALWAYS_INLINE_C void *
imalloc_prof(size_t usize) imalloc_prof(size_t usize)
{ {
void *p; void *p;
prof_thr_cnt_t *cnt; prof_tctx_t *tctx;
PROF_ALLOC_PREP(usize, cnt); tctx = prof_alloc_prep(usize);
if ((uintptr_t)cnt != (uintptr_t)1U) if ((uintptr_t)tctx != (uintptr_t)1U)
p = imalloc_prof_sample(usize, cnt); p = imalloc_prof_sample(usize, tctx);
else else
p = imalloc(usize); p = imalloc(usize);
if (p == NULL) if (p == NULL)
return (NULL); return (NULL);
prof_malloc(p, usize, cnt); prof_malloc(p, usize, tctx);
return (p); return (p);
} }
@ -943,11 +943,11 @@ je_malloc(size_t size)
} }
static void * static void *
imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) imemalign_prof_sample(size_t alignment, size_t usize, prof_tctx_t *tctx)
{ {
void *p; void *p;
if (cnt == NULL) if (tctx == NULL)
return (NULL); return (NULL);
if (usize <= SMALL_MAXCLASS) { if (usize <= SMALL_MAXCLASS) {
assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0); assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0);
@ -963,17 +963,17 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_thr_cnt_t *cnt)
} }
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
imemalign_prof(size_t alignment, size_t usize, prof_thr_cnt_t *cnt) imemalign_prof(size_t alignment, size_t usize, prof_tctx_t *tctx)
{ {
void *p; void *p;
if ((uintptr_t)cnt != (uintptr_t)1U) if ((uintptr_t)tctx != (uintptr_t)1U)
p = imemalign_prof_sample(alignment, usize, cnt); p = imemalign_prof_sample(alignment, usize, tctx);
else else
p = ipalloc(usize, alignment, false); p = ipalloc(usize, alignment, false);
if (p == NULL) if (p == NULL)
return (NULL); return (NULL);
prof_malloc(p, usize, cnt); prof_malloc(p, usize, tctx);
return (p); return (p);
} }
@ -1015,10 +1015,10 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
} }
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_thr_cnt_t *cnt; prof_tctx_t *tctx;
PROF_ALLOC_PREP(usize, cnt); tctx = prof_alloc_prep(usize);
result = imemalign_prof(alignment, usize, cnt); result = imemalign_prof(alignment, usize, tctx);
} else } else
result = ipalloc(usize, alignment, false); result = ipalloc(usize, alignment, false);
if (result == NULL) if (result == NULL)
@ -1070,11 +1070,11 @@ je_aligned_alloc(size_t alignment, size_t size)
} }
static void * static void *
icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt) icalloc_prof_sample(size_t usize, prof_tctx_t *tctx)
{ {
void *p; void *p;
if (cnt == NULL) if (tctx == NULL)
return (NULL); return (NULL);
if (usize <= SMALL_MAXCLASS) { if (usize <= SMALL_MAXCLASS) {
p = icalloc(SMALL_MAXCLASS+1); p = icalloc(SMALL_MAXCLASS+1);
@ -1088,17 +1088,17 @@ icalloc_prof_sample(size_t usize, prof_thr_cnt_t *cnt)
} }
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
icalloc_prof(size_t usize, prof_thr_cnt_t *cnt) icalloc_prof(size_t usize, prof_tctx_t *tctx)
{ {
void *p; void *p;
if ((uintptr_t)cnt != (uintptr_t)1U) if ((uintptr_t)tctx != (uintptr_t)1U)
p = icalloc_prof_sample(usize, cnt); p = icalloc_prof_sample(usize, tctx);
else else
p = icalloc(usize); p = icalloc(usize);
if (p == NULL) if (p == NULL)
return (NULL); return (NULL);
prof_malloc(p, usize, cnt); prof_malloc(p, usize, tctx);
return (p); return (p);
} }
@ -1137,11 +1137,11 @@ je_calloc(size_t num, size_t size)
} }
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_thr_cnt_t *cnt; prof_tctx_t *tctx;
usize = s2u(num_size); usize = s2u(num_size);
PROF_ALLOC_PREP(usize, cnt); tctx = prof_alloc_prep(usize);
ret = icalloc_prof(usize, cnt); ret = icalloc_prof(usize, tctx);
} else { } else {
if (config_stats || (config_valgrind && in_valgrind)) if (config_stats || (config_valgrind && in_valgrind))
usize = s2u(num_size); usize = s2u(num_size);
@ -1167,11 +1167,11 @@ label_return:
} }
static void * static void *
irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt) irealloc_prof_sample(void *oldptr, size_t usize, prof_tctx_t *tctx)
{ {
void *p; void *p;
if (cnt == NULL) if (tctx == NULL)
return (NULL); return (NULL);
if (usize <= SMALL_MAXCLASS) { if (usize <= SMALL_MAXCLASS) {
p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false); p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false);
@ -1185,19 +1185,19 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_thr_cnt_t *cnt)
} }
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_thr_cnt_t *cnt) irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_tctx_t *tctx)
{ {
void *p; void *p;
prof_ctx_t *old_ctx; prof_tctx_t *old_tctx;
old_ctx = prof_ctx_get(oldptr); old_tctx = prof_tctx_get(oldptr);
if ((uintptr_t)cnt != (uintptr_t)1U) if ((uintptr_t)tctx != (uintptr_t)1U)
p = irealloc_prof_sample(oldptr, usize, cnt); p = irealloc_prof_sample(oldptr, usize, tctx);
else else
p = iralloc(oldptr, usize, 0, 0, false); p = iralloc(oldptr, usize, 0, 0, false);
if (p == NULL) if (p == NULL)
return (NULL); return (NULL);
prof_realloc(p, usize, cnt, old_usize, old_ctx); prof_realloc(p, usize, tctx, old_usize, old_tctx);
return (p); return (p);
} }
@ -1253,11 +1253,11 @@ je_realloc(void *ptr, size_t size)
old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize); old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize);
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_thr_cnt_t *cnt; prof_tctx_t *tctx;
usize = s2u(size); usize = s2u(size);
PROF_ALLOC_PREP(usize, cnt); tctx = prof_alloc_prep(usize);
ret = irealloc_prof(ptr, old_usize, usize, cnt); ret = irealloc_prof(ptr, old_usize, usize, tctx);
} else { } else {
if (config_stats || (config_valgrind && in_valgrind)) if (config_stats || (config_valgrind && in_valgrind))
usize = s2u(size); usize = s2u(size);
@ -1379,11 +1379,11 @@ imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache,
static void * static void *
imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache, imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache,
arena_t *arena, prof_thr_cnt_t *cnt) arena_t *arena, prof_tctx_t *tctx)
{ {
void *p; void *p;
if (cnt == NULL) if (tctx == NULL)
return (NULL); return (NULL);
if (usize <= SMALL_MAXCLASS) { if (usize <= SMALL_MAXCLASS) {
size_t usize_promoted = (alignment == 0) ? size_t usize_promoted = (alignment == 0) ?
@ -1402,18 +1402,18 @@ imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache,
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache, imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache,
arena_t *arena, prof_thr_cnt_t *cnt) arena_t *arena, prof_tctx_t *tctx)
{ {
void *p; void *p;
if ((uintptr_t)cnt != (uintptr_t)1U) { if ((uintptr_t)tctx != (uintptr_t)1U) {
p = imallocx_prof_sample(usize, alignment, zero, try_tcache, p = imallocx_prof_sample(usize, alignment, zero, try_tcache,
arena, cnt); arena, tctx);
} else } else
p = imallocx(usize, alignment, zero, try_tcache, arena); p = imallocx(usize, alignment, zero, try_tcache, arena);
if (p == NULL) if (p == NULL)
return (NULL); return (NULL);
prof_malloc(p, usize, cnt); prof_malloc(p, usize, tctx);
return (p); return (p);
} }
@ -1447,11 +1447,11 @@ je_mallocx(size_t size, int flags)
assert(usize != 0); assert(usize != 0);
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_thr_cnt_t *cnt; prof_tctx_t *tctx;
PROF_ALLOC_PREP(usize, cnt); tctx = prof_alloc_prep(usize);
p = imallocx_prof(usize, alignment, zero, try_tcache, arena, p = imallocx_prof(usize, alignment, zero, try_tcache, arena,
cnt); tctx);
} else } else
p = imallocx(usize, alignment, zero, try_tcache, arena); p = imallocx(usize, alignment, zero, try_tcache, arena);
if (p == NULL) if (p == NULL)
@ -1476,11 +1476,11 @@ label_oom:
static void * static void *
irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize, irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize,
bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, arena_t *arena,
prof_thr_cnt_t *cnt) prof_tctx_t *tctx)
{ {
void *p; void *p;
if (cnt == NULL) if (tctx == NULL)
return (NULL); return (NULL);
if (usize <= SMALL_MAXCLASS) { if (usize <= SMALL_MAXCLASS) {
p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
@ -1500,15 +1500,15 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize,
JEMALLOC_ALWAYS_INLINE_C void * JEMALLOC_ALWAYS_INLINE_C void *
irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment, irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment,
size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc, size_t *usize, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
arena_t *arena, prof_thr_cnt_t *cnt) arena_t *arena, prof_tctx_t *tctx)
{ {
void *p; void *p;
prof_ctx_t *old_ctx; prof_tctx_t *old_tctx;
old_ctx = prof_ctx_get(oldptr); old_tctx = prof_tctx_get(oldptr);
if ((uintptr_t)cnt != (uintptr_t)1U) if ((uintptr_t)tctx != (uintptr_t)1U)
p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero, p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero,
try_tcache_alloc, try_tcache_dalloc, arena, cnt); try_tcache_alloc, try_tcache_dalloc, arena, tctx);
else { else {
p = iralloct(oldptr, size, 0, alignment, zero, p = iralloct(oldptr, size, 0, alignment, zero,
try_tcache_alloc, try_tcache_dalloc, arena); try_tcache_alloc, try_tcache_dalloc, arena);
@ -1527,7 +1527,7 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment,
*/ */
*usize = isalloc(p, config_prof); *usize = isalloc(p, config_prof);
} }
prof_realloc(p, *usize, cnt, old_usize, old_ctx); prof_realloc(p, *usize, tctx, old_usize, old_tctx);
return (p); return (p);
} }
@ -1570,13 +1570,13 @@ je_rallocx(void *ptr, size_t size, int flags)
old_rzsize = u2rz(old_usize); old_rzsize = u2rz(old_usize);
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_thr_cnt_t *cnt; prof_tctx_t *tctx;
usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
assert(usize != 0); assert(usize != 0);
PROF_ALLOC_PREP(usize, cnt); tctx = prof_alloc_prep(usize);
p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero, p = irallocx_prof(ptr, old_usize, size, alignment, &usize, zero,
try_tcache_alloc, try_tcache_dalloc, arena, cnt); try_tcache_alloc, try_tcache_dalloc, arena, tctx);
if (p == NULL) if (p == NULL)
goto label_oom; goto label_oom;
} else { } else {
@ -1623,11 +1623,11 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra,
static size_t static size_t
ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra, ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
size_t alignment, size_t max_usize, bool zero, arena_t *arena, size_t alignment, size_t max_usize, bool zero, arena_t *arena,
prof_thr_cnt_t *cnt) prof_tctx_t *tctx)
{ {
size_t usize; size_t usize;
if (cnt == NULL) if (tctx == NULL)
return (old_usize); return (old_usize);
/* Use minimum usize to determine whether promotion may happen. */ /* Use minimum usize to determine whether promotion may happen. */
if (((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <= if (((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <=
@ -1650,22 +1650,22 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
JEMALLOC_ALWAYS_INLINE_C size_t JEMALLOC_ALWAYS_INLINE_C size_t
ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra, ixallocx_prof(void *ptr, size_t old_usize, size_t size, size_t extra,
size_t alignment, size_t max_usize, bool zero, arena_t *arena, size_t alignment, size_t max_usize, bool zero, arena_t *arena,
prof_thr_cnt_t *cnt) prof_tctx_t *tctx)
{ {
size_t usize; size_t usize;
prof_ctx_t *old_ctx; prof_tctx_t *old_tctx;
old_ctx = prof_ctx_get(ptr); old_tctx = prof_tctx_get(ptr);
if ((uintptr_t)cnt != (uintptr_t)1U) { if ((uintptr_t)tctx != (uintptr_t)1U) {
usize = ixallocx_prof_sample(ptr, old_usize, size, extra, usize = ixallocx_prof_sample(ptr, old_usize, size, extra,
alignment, zero, max_usize, arena, cnt); alignment, zero, max_usize, arena, tctx);
} else { } else {
usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
zero, arena); zero, arena);
} }
if (usize == old_usize) if (usize == old_usize)
return (usize); return (usize);
prof_realloc(ptr, usize, cnt, old_usize, old_ctx); prof_realloc(ptr, usize, tctx, old_usize, old_tctx);
return (usize); return (usize);
} }
@ -1697,19 +1697,19 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
old_rzsize = u2rz(old_usize); old_rzsize = u2rz(old_usize);
if (config_prof && opt_prof) { if (config_prof && opt_prof) {
prof_thr_cnt_t *cnt; prof_tctx_t *tctx;
/* /*
* usize isn't knowable before ixalloc() returns when extra is * usize isn't knowable before ixalloc() returns when extra is
* non-zero. Therefore, compute its maximum possible value and * non-zero. Therefore, compute its maximum possible value and
* use that in PROF_ALLOC_PREP() to decide whether to capture a * use that in prof_alloc_prep() to decide whether to capture a
* backtrace. prof_realloc() will use the actual usize to * backtrace. prof_realloc() will use the actual usize to
* decide whether to sample. * decide whether to sample.
*/ */
size_t max_usize = (alignment == 0) ? s2u(size+extra) : size_t max_usize = (alignment == 0) ? s2u(size+extra) :
sa2u(size+extra, alignment); sa2u(size+extra, alignment);
PROF_ALLOC_PREP(max_usize, cnt); tctx = prof_alloc_prep(max_usize);
usize = ixallocx_prof(ptr, old_usize, size, extra, alignment, usize = ixallocx_prof(ptr, old_usize, size, extra, alignment,
max_usize, zero, arena, cnt); max_usize, zero, arena, tctx);
} else { } else {
usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
zero, arena); zero, arena);

1145
src/prof.c

File diff suppressed because it is too large Load Diff

View File

@ -441,7 +441,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
} }
if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 && if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 &&
bv) { bv) {
CTL_GET("opt.lg_prof_sample", &sv, size_t); CTL_GET("prof.lg_sample", &sv, size_t);
malloc_cprintf(write_cb, cbopaque, malloc_cprintf(write_cb, cbopaque,
"Average profile sample interval: %"PRIu64 "Average profile sample interval: %"PRIu64
" (2^%zu)\n", (((uint64_t)1U) << sv), sv); " (2^%zu)\n", (((uint64_t)1U) << sv), sv);