Remove ephemeral mutexes.

Remove ephemeral mutexes from the prof machinery, and remove
malloc_mutex_destroy().  This simplifies mutex management on systems
that call malloc()/free() inside pthread_mutex_{create,destroy}().

Add atomic_*_u() for operation on unsigned values.

Fix prof_printf() to call malloc_vsnprintf() rather than
malloc_snprintf().
This commit is contained in:
Jason Evans 2012-03-23 18:05:51 -07:00
parent b80581d309
commit 6da5418ded
6 changed files with 89 additions and 43 deletions

View File

@ -12,6 +12,7 @@
#define atomic_read_uint64(p) atomic_add_uint64(p, 0) #define atomic_read_uint64(p) atomic_add_uint64(p, 0)
#define atomic_read_uint32(p) atomic_add_uint32(p, 0) #define atomic_read_uint32(p) atomic_add_uint32(p, 0)
#define atomic_read_z(p) atomic_add_z(p, 0) #define atomic_read_z(p) atomic_add_z(p, 0)
#define atomic_read_u(p) atomic_add_u(p, 0)
#endif /* JEMALLOC_H_EXTERNS */ #endif /* JEMALLOC_H_EXTERNS */
/******************************************************************************/ /******************************************************************************/
@ -24,6 +25,8 @@ uint32_t atomic_add_uint32(uint32_t *p, uint32_t x);
uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x);
size_t atomic_add_z(size_t *p, size_t x); size_t atomic_add_z(size_t *p, size_t x);
size_t atomic_sub_z(size_t *p, size_t x); size_t atomic_sub_z(size_t *p, size_t x);
unsigned atomic_add_u(unsigned *p, unsigned x);
unsigned atomic_sub_u(unsigned *p, unsigned x);
#endif #endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
@ -192,6 +195,33 @@ atomic_sub_z(size_t *p, size_t x)
(uint32_t)-((int32_t)x))); (uint32_t)-((int32_t)x)));
#endif #endif
} }
/******************************************************************************/
/* unsigned operations. */
JEMALLOC_INLINE unsigned
atomic_add_u(unsigned *p, unsigned x)
{
#if (LG_SIZEOF_INT == 3)
return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
#elif (LG_SIZEOF_INT == 2)
return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
#endif
}
JEMALLOC_INLINE unsigned
atomic_sub_u(unsigned *p, unsigned x)
{
#if (LG_SIZEOF_INT == 3)
return ((unsigned)atomic_add_uint64((uint64_t *)p,
(uint64_t)-((int64_t)x)));
#elif (LG_SIZEOF_INT == 2)
return ((unsigned)atomic_add_uint32((uint32_t *)p,
(uint32_t)-((int32_t)x)));
#endif
}
/******************************************************************************/
#endif #endif
#endif /* JEMALLOC_H_INLINES */ #endif /* JEMALLOC_H_INLINES */

View File

@ -28,7 +28,6 @@ extern bool isthreaded;
#endif #endif
bool malloc_mutex_init(malloc_mutex_t *mutex); bool malloc_mutex_init(malloc_mutex_t *mutex);
void malloc_mutex_destroy(malloc_mutex_t *mutex);
void malloc_mutex_prefork(malloc_mutex_t *mutex); void malloc_mutex_prefork(malloc_mutex_t *mutex);
void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); void malloc_mutex_postfork_parent(malloc_mutex_t *mutex);
void malloc_mutex_postfork_child(malloc_mutex_t *mutex); void malloc_mutex_postfork_child(malloc_mutex_t *mutex);

View File

@ -31,6 +31,12 @@ typedef struct prof_tdata_s prof_tdata_t;
/* Size of stack-allocated buffer used by prof_printf(). */ /* Size of stack-allocated buffer used by prof_printf(). */
#define PROF_PRINTF_BUFSIZE 128 #define PROF_PRINTF_BUFSIZE 128
/*
* Number of mutexes shared among all ctx's. No space is allocated for these
* unless profiling is enabled, so it's okay to over-provision.
*/
#define PROF_NCTX_LOCKS 1024
#endif /* JEMALLOC_H_TYPES */ #endif /* JEMALLOC_H_TYPES */
/******************************************************************************/ /******************************************************************************/
#ifdef JEMALLOC_H_STRUCTS #ifdef JEMALLOC_H_STRUCTS
@ -108,7 +114,7 @@ struct prof_ctx_s {
prof_bt_t *bt; prof_bt_t *bt;
/* Protects cnt_merged and cnts_ql. */ /* Protects cnt_merged and cnts_ql. */
malloc_mutex_t lock; malloc_mutex_t *lock;
/* Temporary storage for summation during dump. */ /* Temporary storage for summation during dump. */
prof_cnt_t cnt_summed; prof_cnt_t cnt_summed;
@ -444,10 +450,10 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
* It's too late to propagate OOM for this realloc(), * It's too late to propagate OOM for this realloc(),
* so operate directly on old_cnt->ctx->cnt_merged. * so operate directly on old_cnt->ctx->cnt_merged.
*/ */
malloc_mutex_lock(&old_ctx->lock); malloc_mutex_lock(old_ctx->lock);
old_ctx->cnt_merged.curobjs--; old_ctx->cnt_merged.curobjs--;
old_ctx->cnt_merged.curbytes -= old_size; old_ctx->cnt_merged.curbytes -= old_size;
malloc_mutex_unlock(&old_ctx->lock); malloc_mutex_unlock(old_ctx->lock);
told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
} }
} else } else
@ -516,10 +522,10 @@ prof_free(const void *ptr, size_t size)
* OOM during free() cannot be propagated, so operate * OOM during free() cannot be propagated, so operate
* directly on cnt->ctx->cnt_merged. * directly on cnt->ctx->cnt_merged.
*/ */
malloc_mutex_lock(&ctx->lock); malloc_mutex_lock(ctx->lock);
ctx->cnt_merged.curobjs--; ctx->cnt_merged.curobjs--;
ctx->cnt_merged.curbytes -= size; ctx->cnt_merged.curbytes -= size;
malloc_mutex_unlock(&ctx->lock); malloc_mutex_unlock(ctx->lock);
} }
} }
} }

View File

@ -638,11 +638,6 @@ malloc_init_hard(void)
return (true); return (true);
} }
if (config_prof && prof_boot2()) {
malloc_mutex_unlock(&init_lock);
return (true);
}
if (arenas_tsd_boot()) { if (arenas_tsd_boot()) {
malloc_mutex_unlock(&init_lock); malloc_mutex_unlock(&init_lock);
return (true); return (true);
@ -653,6 +648,11 @@ malloc_init_hard(void)
return (true); return (true);
} }
if (config_prof && prof_boot2()) {
malloc_mutex_unlock(&init_lock);
return (true);
}
/* Get number of CPUs. */ /* Get number of CPUs. */
malloc_initializer = pthread_self(); malloc_initializer = pthread_self();
malloc_mutex_unlock(&init_lock); malloc_mutex_unlock(&init_lock);

View File

@ -81,18 +81,6 @@ malloc_mutex_init(malloc_mutex_t *mutex)
return (false); return (false);
} }
void
malloc_mutex_destroy(malloc_mutex_t *mutex)
{
#ifndef JEMALLOC_OSSPIN
if (pthread_mutex_destroy(mutex) != 0) {
malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n");
abort();
}
#endif
}
void void
malloc_mutex_prefork(malloc_mutex_t *mutex) malloc_mutex_prefork(malloc_mutex_t *mutex)
{ {

View File

@ -28,6 +28,16 @@ char opt_prof_prefix[PATH_MAX + 1];
uint64_t prof_interval; uint64_t prof_interval;
bool prof_promote; bool prof_promote;
/*
* Table of mutexes that are shared among ctx's. These are leaf locks, so
* there is no problem with using them for more than one ctx at the same time.
* The primary motivation for this sharing though is that ctx's are ephemeral,
* and destroying mutexes causes complications for systems that allocate when
* creating/destroying mutexes.
*/
static malloc_mutex_t *ctx_locks;
static unsigned cum_ctxs; /* Atomic counter. */
/* /*
* Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
* structure that knows about all backtraces currently captured. * structure that knows about all backtraces currently captured.
@ -87,6 +97,7 @@ static void prof_fdump(void);
static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
size_t *hash2); size_t *hash2);
static bool prof_bt_keycomp(const void *k1, const void *k2); static bool prof_bt_keycomp(const void *k1, const void *k2);
static malloc_mutex_t *prof_ctx_mutex_choose(void);
/******************************************************************************/ /******************************************************************************/
@ -471,18 +482,12 @@ prof_lookup(prof_bt_t *bt)
return (NULL); return (NULL);
} }
ctx.p->bt = btkey.p; ctx.p->bt = btkey.p;
if (malloc_mutex_init(&ctx.p->lock)) { ctx.p->lock = prof_ctx_mutex_choose();
prof_leave();
idalloc(btkey.v);
idalloc(ctx.v);
return (NULL);
}
memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
ql_new(&ctx.p->cnts_ql); ql_new(&ctx.p->cnts_ql);
if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
/* OOM. */ /* OOM. */
prof_leave(); prof_leave();
malloc_mutex_destroy(&ctx.p->lock);
idalloc(btkey.v); idalloc(btkey.v);
idalloc(ctx.v); idalloc(ctx.v);
return (NULL); return (NULL);
@ -502,9 +507,9 @@ prof_lookup(prof_bt_t *bt)
* Artificially raise curobjs, in order to avoid a race * Artificially raise curobjs, in order to avoid a race
* condition with prof_ctx_merge()/prof_ctx_destroy(). * condition with prof_ctx_merge()/prof_ctx_destroy().
*/ */
malloc_mutex_lock(&ctx.p->lock); malloc_mutex_lock(ctx.p->lock);
ctx.p->cnt_merged.curobjs++; ctx.p->cnt_merged.curobjs++;
malloc_mutex_unlock(&ctx.p->lock); malloc_mutex_unlock(ctx.p->lock);
new_ctx = false; new_ctx = false;
} }
prof_leave(); prof_leave();
@ -547,10 +552,10 @@ prof_lookup(prof_bt_t *bt)
return (NULL); return (NULL);
} }
ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
malloc_mutex_lock(&ctx.p->lock); malloc_mutex_lock(ctx.p->lock);
ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
ctx.p->cnt_merged.curobjs--; ctx.p->cnt_merged.curobjs--;
malloc_mutex_unlock(&ctx.p->lock); malloc_mutex_unlock(ctx.p->lock);
} else { } else {
/* Move ret to the front of the LRU. */ /* Move ret to the front of the LRU. */
ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
@ -622,7 +627,7 @@ prof_printf(bool propagate_err, const char *format, ...)
char buf[PROF_PRINTF_BUFSIZE]; char buf[PROF_PRINTF_BUFSIZE];
va_start(ap, format); va_start(ap, format);
malloc_snprintf(buf, sizeof(buf), format, ap); malloc_vsnprintf(buf, sizeof(buf), format, ap);
va_end(ap); va_end(ap);
ret = prof_write(propagate_err, buf); ret = prof_write(propagate_err, buf);
@ -637,7 +642,7 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
cassert(config_prof); cassert(config_prof);
malloc_mutex_lock(&ctx->lock); malloc_mutex_lock(ctx->lock);
memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
@ -676,7 +681,7 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
cnt_all->accumbytes += ctx->cnt_summed.accumbytes; cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
} }
malloc_mutex_unlock(&ctx->lock); malloc_mutex_unlock(ctx->lock);
} }
static void static void
@ -693,7 +698,7 @@ prof_ctx_destroy(prof_ctx_t *ctx)
* prof_ctx_merge() and entry into this function. * prof_ctx_merge() and entry into this function.
*/ */
prof_enter(); prof_enter();
malloc_mutex_lock(&ctx->lock); malloc_mutex_lock(ctx->lock);
if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.curbytes == 0);
assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumobjs == 0);
@ -703,9 +708,8 @@ prof_ctx_destroy(prof_ctx_t *ctx)
assert(false); assert(false);
prof_leave(); prof_leave();
/* Destroy ctx. */ /* Destroy ctx. */
malloc_mutex_unlock(&ctx->lock); malloc_mutex_unlock(ctx->lock);
bt_destroy(ctx->bt); bt_destroy(ctx->bt);
malloc_mutex_destroy(&ctx->lock);
idalloc(ctx); idalloc(ctx);
} else { } else {
/* /*
@ -713,7 +717,7 @@ prof_ctx_destroy(prof_ctx_t *ctx)
* prof_lookup(). * prof_lookup().
*/ */
ctx->cnt_merged.curobjs--; ctx->cnt_merged.curobjs--;
malloc_mutex_unlock(&ctx->lock); malloc_mutex_unlock(ctx->lock);
prof_leave(); prof_leave();
} }
} }
@ -726,7 +730,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
cassert(config_prof); cassert(config_prof);
/* Merge cnt stats and detach from ctx. */ /* Merge cnt stats and detach from ctx. */
malloc_mutex_lock(&ctx->lock); malloc_mutex_lock(ctx->lock);
ctx->cnt_merged.curobjs += cnt->cnts.curobjs; ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
ctx->cnt_merged.curbytes += cnt->cnts.curbytes; ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
@ -751,7 +755,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
destroy = true; destroy = true;
} else } else
destroy = false; destroy = false;
malloc_mutex_unlock(&ctx->lock); malloc_mutex_unlock(ctx->lock);
if (destroy) if (destroy)
prof_ctx_destroy(ctx); prof_ctx_destroy(ctx);
} }
@ -1067,6 +1071,14 @@ prof_bt_keycomp(const void *k1, const void *k2)
return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
} }
static malloc_mutex_t *
prof_ctx_mutex_choose(void)
{
unsigned nctxs = atomic_add_u(&cum_ctxs, 1);
return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]);
}
prof_tdata_t * prof_tdata_t *
prof_tdata_init(void) prof_tdata_init(void)
{ {
@ -1177,6 +1189,8 @@ prof_boot2(void)
cassert(config_prof); cassert(config_prof);
if (opt_prof) { if (opt_prof) {
unsigned i;
if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
prof_bt_keycomp)) prof_bt_keycomp))
return (true); return (true);
@ -1202,6 +1216,15 @@ prof_boot2(void)
if (opt_abort) if (opt_abort)
abort(); abort();
} }
ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
sizeof(malloc_mutex_t));
if (ctx_locks == NULL)
return (true);
for (i = 0; i < PROF_NCTX_LOCKS; i++) {
if (malloc_mutex_init(&ctx_locks[i]))
return (true);
}
} }
#ifdef JEMALLOC_PROF_LIBGCC #ifdef JEMALLOC_PROF_LIBGCC