From 6da5418ded9170b087c35960e0010006430117c1 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Fri, 23 Mar 2012 18:05:51 -0700 Subject: [PATCH] Remove ephemeral mutexes. Remove ephemeral mutexes from the prof machinery, and remove malloc_mutex_destroy(). This simplifies mutex management on systems that call malloc()/free() inside pthread_mutex_{create,destroy}(). Add atomic_*_u() for operation on unsigned values. Fix prof_printf() to call malloc_vsnprintf() rather than malloc_snprintf(). --- include/jemalloc/internal/atomic.h | 30 ++++++++++++++ include/jemalloc/internal/mutex.h | 1 - include/jemalloc/internal/prof.h | 16 +++++--- src/jemalloc.c | 10 ++--- src/mutex.c | 12 ------ src/prof.c | 63 ++++++++++++++++++++---------- 6 files changed, 89 insertions(+), 43 deletions(-) diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index afeb9cb7..7a9cb61e 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -12,6 +12,7 @@ #define atomic_read_uint64(p) atomic_add_uint64(p, 0) #define atomic_read_uint32(p) atomic_add_uint32(p, 0) #define atomic_read_z(p) atomic_add_z(p, 0) +#define atomic_read_u(p) atomic_add_u(p, 0) #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -24,6 +25,8 @@ uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); size_t atomic_add_z(size_t *p, size_t x); size_t atomic_sub_z(size_t *p, size_t x); +unsigned atomic_add_u(unsigned *p, unsigned x); +unsigned atomic_sub_u(unsigned *p, unsigned x); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) @@ -192,6 +195,33 @@ atomic_sub_z(size_t *p, size_t x) (uint32_t)-((int32_t)x))); #endif } + +/******************************************************************************/ +/* unsigned operations. */ +JEMALLOC_INLINE unsigned +atomic_add_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x)); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x)); +#endif +} + +JEMALLOC_INLINE unsigned +atomic_sub_u(unsigned *p, unsigned x) +{ + +#if (LG_SIZEOF_INT == 3) + return ((unsigned)atomic_add_uint64((uint64_t *)p, + (uint64_t)-((int64_t)x))); +#elif (LG_SIZEOF_INT == 2) + return ((unsigned)atomic_add_uint32((uint32_t *)p, + (uint32_t)-((int32_t)x))); +#endif +} +/******************************************************************************/ #endif #endif /* JEMALLOC_H_INLINES */ diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h index 9d136585..10637e92 100644 --- a/include/jemalloc/internal/mutex.h +++ b/include/jemalloc/internal/mutex.h @@ -28,7 +28,6 @@ extern bool isthreaded; #endif bool malloc_mutex_init(malloc_mutex_t *mutex); -void malloc_mutex_destroy(malloc_mutex_t *mutex); void malloc_mutex_prefork(malloc_mutex_t *mutex); void malloc_mutex_postfork_parent(malloc_mutex_t *mutex); void malloc_mutex_postfork_child(malloc_mutex_t *mutex); diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h index 231a3876..34929e7e 100644 --- a/include/jemalloc/internal/prof.h +++ b/include/jemalloc/internal/prof.h @@ -31,6 +31,12 @@ typedef struct prof_tdata_s prof_tdata_t; /* Size of stack-allocated buffer used by prof_printf(). */ #define PROF_PRINTF_BUFSIZE 128 +/* + * Number of mutexes shared among all ctx's. No space is allocated for these + * unless profiling is enabled, so it's okay to over-provision. + */ +#define PROF_NCTX_LOCKS 1024 + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -108,7 +114,7 @@ struct prof_ctx_s { prof_bt_t *bt; /* Protects cnt_merged and cnts_ql. */ - malloc_mutex_t lock; + malloc_mutex_t *lock; /* Temporary storage for summation during dump. */ prof_cnt_t cnt_summed; @@ -444,10 +450,10 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, * It's too late to propagate OOM for this realloc(), * so operate directly on old_cnt->ctx->cnt_merged. */ - malloc_mutex_lock(&old_ctx->lock); + malloc_mutex_lock(old_ctx->lock); old_ctx->cnt_merged.curobjs--; old_ctx->cnt_merged.curbytes -= old_size; - malloc_mutex_unlock(&old_ctx->lock); + malloc_mutex_unlock(old_ctx->lock); told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; } } else @@ -516,10 +522,10 @@ prof_free(const void *ptr, size_t size) * OOM during free() cannot be propagated, so operate * directly on cnt->ctx->cnt_merged. */ - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); ctx->cnt_merged.curobjs--; ctx->cnt_merged.curbytes -= size; - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); } } } diff --git a/src/jemalloc.c b/src/jemalloc.c index f9451796..b3e898c1 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -638,11 +638,6 @@ malloc_init_hard(void) return (true); } - if (config_prof && prof_boot2()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - if (arenas_tsd_boot()) { malloc_mutex_unlock(&init_lock); return (true); @@ -653,6 +648,11 @@ malloc_init_hard(void) return (true); } + if (config_prof && prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* Get number of CPUs. */ malloc_initializer = pthread_self(); malloc_mutex_unlock(&init_lock); diff --git a/src/mutex.c b/src/mutex.c index 243b7129..07d2a033 100644 --- a/src/mutex.c +++ b/src/mutex.c @@ -81,18 +81,6 @@ malloc_mutex_init(malloc_mutex_t *mutex) return (false); } -void -malloc_mutex_destroy(malloc_mutex_t *mutex) -{ - -#ifndef JEMALLOC_OSSPIN - if (pthread_mutex_destroy(mutex) != 0) { - malloc_write(": Error in pthread_mutex_destroy()\n"); - abort(); - } -#endif -} - void malloc_mutex_prefork(malloc_mutex_t *mutex) { diff --git a/src/prof.c b/src/prof.c index ba0b64e1..bc21d894 100644 --- a/src/prof.c +++ b/src/prof.c @@ -28,6 +28,16 @@ char opt_prof_prefix[PATH_MAX + 1]; uint64_t prof_interval; bool prof_promote; +/* + * Table of mutexes that are shared among ctx's. These are leaf locks, so + * there is no problem with using them for more than one ctx at the same time. + * The primary motivation for this sharing though is that ctx's are ephemeral, + * and destroying mutexes causes complications for systems that allocate when + * creating/destroying mutexes. + */ +static malloc_mutex_t *ctx_locks; +static unsigned cum_ctxs; /* Atomic counter. */ + /* * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data * structure that knows about all backtraces currently captured. @@ -87,6 +97,7 @@ static void prof_fdump(void); static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2); static bool prof_bt_keycomp(const void *k1, const void *k2); +static malloc_mutex_t *prof_ctx_mutex_choose(void); /******************************************************************************/ @@ -471,18 +482,12 @@ prof_lookup(prof_bt_t *bt) return (NULL); } ctx.p->bt = btkey.p; - if (malloc_mutex_init(&ctx.p->lock)) { - prof_leave(); - idalloc(btkey.v); - idalloc(ctx.v); - return (NULL); - } + ctx.p->lock = prof_ctx_mutex_choose(); memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); ql_new(&ctx.p->cnts_ql); if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ prof_leave(); - malloc_mutex_destroy(&ctx.p->lock); idalloc(btkey.v); idalloc(ctx.v); return (NULL); @@ -502,9 +507,9 @@ prof_lookup(prof_bt_t *bt) * Artificially raise curobjs, in order to avoid a race * condition with prof_ctx_merge()/prof_ctx_destroy(). */ - malloc_mutex_lock(&ctx.p->lock); + malloc_mutex_lock(ctx.p->lock); ctx.p->cnt_merged.curobjs++; - malloc_mutex_unlock(&ctx.p->lock); + malloc_mutex_unlock(ctx.p->lock); new_ctx = false; } prof_leave(); @@ -547,10 +552,10 @@ prof_lookup(prof_bt_t *bt) return (NULL); } ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - malloc_mutex_lock(&ctx.p->lock); + malloc_mutex_lock(ctx.p->lock); ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); + malloc_mutex_unlock(ctx.p->lock); } else { /* Move ret to the front of the LRU. */ ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); @@ -622,7 +627,7 @@ prof_printf(bool propagate_err, const char *format, ...) char buf[PROF_PRINTF_BUFSIZE]; va_start(ap, format); - malloc_snprintf(buf, sizeof(buf), format, ap); + malloc_vsnprintf(buf, sizeof(buf), format, ap); va_end(ap); ret = prof_write(propagate_err, buf); @@ -637,7 +642,7 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) cassert(config_prof); - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { @@ -676,7 +681,7 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) cnt_all->accumbytes += ctx->cnt_summed.accumbytes; } - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); } static void @@ -693,7 +698,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) * prof_ctx_merge() and entry into this function. */ prof_enter(); - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.accumobjs == 0); @@ -703,9 +708,8 @@ prof_ctx_destroy(prof_ctx_t *ctx) assert(false); prof_leave(); /* Destroy ctx. */ - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); bt_destroy(ctx->bt); - malloc_mutex_destroy(&ctx->lock); idalloc(ctx); } else { /* @@ -713,7 +717,7 @@ prof_ctx_destroy(prof_ctx_t *ctx) * prof_lookup(). */ ctx->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); prof_leave(); } } @@ -726,7 +730,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) cassert(config_prof); /* Merge cnt stats and detach from ctx. */ - malloc_mutex_lock(&ctx->lock); + malloc_mutex_lock(ctx->lock); ctx->cnt_merged.curobjs += cnt->cnts.curobjs; ctx->cnt_merged.curbytes += cnt->cnts.curbytes; ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; @@ -751,7 +755,7 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) destroy = true; } else destroy = false; - malloc_mutex_unlock(&ctx->lock); + malloc_mutex_unlock(ctx->lock); if (destroy) prof_ctx_destroy(ctx); } @@ -1067,6 +1071,14 @@ prof_bt_keycomp(const void *k1, const void *k2) return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } +static malloc_mutex_t * +prof_ctx_mutex_choose(void) +{ + unsigned nctxs = atomic_add_u(&cum_ctxs, 1); + + return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); +} + prof_tdata_t * prof_tdata_init(void) { @@ -1177,6 +1189,8 @@ prof_boot2(void) cassert(config_prof); if (opt_prof) { + unsigned i; + if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); @@ -1202,6 +1216,15 @@ prof_boot2(void) if (opt_abort) abort(); } + + ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * + sizeof(malloc_mutex_t)); + if (ctx_locks == NULL) + return (true); + for (i = 0; i < PROF_NCTX_LOCKS; i++) { + if (malloc_mutex_init(&ctx_locks[i])) + return (true); + } } #ifdef JEMALLOC_PROF_LIBGCC