#define	JEMALLOC_TCACHE_C_
#include "jemalloc/internal/jemalloc_internal.h"
#ifdef JEMALLOC_TCACHE
/******************************************************************************/
/* Data. */

bool	opt_tcache = true;
ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;

/* Map of thread-specific caches. */
__thread tcache_t	*tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));

/*
 * Same contents as tcache, but initialized such that the TSD destructor is
 * called when a thread exits, so that the cache can be cleaned up.
 */
static pthread_key_t		tcache_tsd;

unsigned			tcache_gc_incr;

/******************************************************************************/
/* Function prototypes for non-inline static functions. */

static void	tcache_thread_cleanup(void *arg);

/******************************************************************************/

void *
tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
{
	void *ret;

	arena_tcache_fill(tcache->arena, tbin, binind
#ifdef JEMALLOC_PROF
	    , tcache->prof_accumbytes
#endif
	    );
#ifdef JEMALLOC_PROF
	tcache->prof_accumbytes = 0;
#endif
	ret = tcache_bin_alloc(tbin);

	return (ret);
}

void
tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
#ifdef JEMALLOC_PROF
    , tcache_t *tcache
#endif
    )
{
	void *flush, *deferred, *ptr;
	unsigned i, nflush, ndeferred;

	for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL;
	    flush = deferred, nflush = ndeferred) {
		/* Lock the arena associated with the first object. */
		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
		arena_t *arena = chunk->arena;
		malloc_mutex_lock(&arena->lock);
#ifdef JEMALLOC_PROF
		if (arena == tcache->arena) {
			arena_prof_accum(arena, tcache->prof_accumbytes);
			tcache->prof_accumbytes = 0;
		}
#endif
		deferred = NULL;
		ndeferred = 0;
		for (i = 0; i < nflush; i++) {
			ptr = flush;
			assert(ptr != NULL);
			flush = *(void **)ptr;
			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
			if (chunk->arena == arena) {
				size_t pageind = (((uintptr_t)ptr -
				    (uintptr_t)chunk) >> PAGE_SHIFT);
				arena_chunk_map_t *mapelm =
				    &chunk->map[pageind];
				arena_dalloc_bin(arena, chunk, ptr, mapelm);
			} else {
				/*
				 * This object was allocated via a different
				 * arena than the one that is currently locked.
				 * Stash the object, so that it can be handled
				 * in a future pass.
				 */
				*(void **)ptr = deferred;
				deferred = ptr;
				ndeferred++;
			}
		}
#ifdef JEMALLOC_STATS
		arena->bins[binind].stats.nflushes++;
		{
			arena_bin_t *bin = &arena->bins[binind];
			bin->stats.nrequests += tbin->tstats.nrequests;
			if (bin->reg_size <= small_maxclass) {
				arena->stats.nmalloc_small +=
				    tbin->tstats.nrequests;
			} else {
				arena->stats.nmalloc_medium +=
				    tbin->tstats.nrequests;
			}
			tbin->tstats.nrequests = 0;
		}
#endif
		malloc_mutex_unlock(&arena->lock);

		if (flush != NULL) {
			/*
			 * This was the first pass, and rem cached objects
			 * remain.
			 */
			tbin->avail = flush;
		}
	}

	tbin->ncached = rem;
}

tcache_t *
tcache_create(arena_t *arena)
{
	tcache_t *tcache;
	size_t size;
	unsigned i;

	size = sizeof(tcache_t) + (sizeof(tcache_bin_t) * (nbins - 1));
	/*
	 * Round up to the nearest multiple of the cacheline size, in order to
	 * avoid the possibility of false cacheline sharing.
	 *
	 * That this works relies on the same logic as in ipalloc().
	 */
	size = (size + CACHELINE_MASK) & (-CACHELINE);

	if (size <= small_maxclass)
		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
	else if (size <= bin_maxclass)
		tcache = (tcache_t *)arena_malloc_medium(arena, size, true);
	else
		tcache = (tcache_t *)icalloc(size);

	if (tcache == NULL)
		return (NULL);

#ifdef JEMALLOC_STATS
	/* Link into list of extant tcaches. */
	malloc_mutex_lock(&arena->lock);
	ql_elm_new(tcache, link);
	ql_tail_insert(&arena->tcache_ql, tcache, link);
	malloc_mutex_unlock(&arena->lock);
#endif

	tcache->arena = arena;
	assert((TCACHE_NSLOTS_MAX & 1U) == 0);
	for (i = 0; i < nbins; i++) {
		if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_MAX) {
			tcache->tbins[i].ncached_max = (arena->bins[i].nregs <<
			    1);
		} else
			tcache->tbins[i].ncached_max = TCACHE_NSLOTS_MAX;
	}

	tcache_tls = tcache;
	pthread_setspecific(tcache_tsd, tcache);

	return (tcache);
}

void
tcache_destroy(tcache_t *tcache)
{
	unsigned i;

#ifdef JEMALLOC_STATS
	/* Unlink from list of extant tcaches. */
	malloc_mutex_lock(&tcache->arena->lock);
	ql_remove(&tcache->arena->tcache_ql, tcache, link);
	tcache_stats_merge(tcache, tcache->arena);
	malloc_mutex_unlock(&tcache->arena->lock);
#endif

	for (i = 0; i < nbins; i++) {
		tcache_bin_t *tbin = &tcache->tbins[i];
		tcache_bin_flush(tbin, i, 0
#ifdef JEMALLOC_PROF
		    , tcache
#endif
		    );

#ifdef JEMALLOC_STATS
		if (tbin->tstats.nrequests != 0) {
			arena_t *arena = tcache->arena;
			arena_bin_t *bin = &arena->bins[i];
			malloc_mutex_lock(&arena->lock);
			bin->stats.nrequests += tbin->tstats.nrequests;
			if (bin->reg_size <= small_maxclass) {
				arena->stats.nmalloc_small +=
				    tbin->tstats.nrequests;
			} else {
				arena->stats.nmalloc_medium +=
				    tbin->tstats.nrequests;
			}
			malloc_mutex_unlock(&arena->lock);
		}
#endif
	}

#ifdef JEMALLOC_PROF
	if (tcache->prof_accumbytes > 0) {
		malloc_mutex_lock(&tcache->arena->lock);
		arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
		malloc_mutex_unlock(&tcache->arena->lock);
	}
#endif

	if (arena_salloc(tcache) <= bin_maxclass) {
		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
		arena_t *arena = chunk->arena;
		size_t pageind = (((uintptr_t)tcache - (uintptr_t)chunk) >>
		    PAGE_SHIFT);
		arena_chunk_map_t *mapelm = &chunk->map[pageind];

		malloc_mutex_lock(&arena->lock);
		arena_dalloc_bin(arena, chunk, tcache, mapelm);
		malloc_mutex_unlock(&arena->lock);
	} else
		idalloc(tcache);
}

static void
tcache_thread_cleanup(void *arg)
{
	tcache_t *tcache = (tcache_t *)arg;

	assert(tcache == tcache_tls);
	if (tcache != NULL) {
		assert(tcache != (void *)(uintptr_t)1);
		tcache_destroy(tcache);
		tcache_tls = (void *)(uintptr_t)1;
	}
}

#ifdef JEMALLOC_STATS
void
tcache_stats_merge(tcache_t *tcache, arena_t *arena)
{
	unsigned i;

	/* Merge and reset tcache stats. */
	for (i = 0; i < mbin0; i++) {
		arena_bin_t *bin = &arena->bins[i];
		tcache_bin_t *tbin = &tcache->tbins[i];
		bin->stats.nrequests += tbin->tstats.nrequests;
		arena->stats.nmalloc_small += tbin->tstats.nrequests;
		tbin->tstats.nrequests = 0;
	}
	for (; i < nbins; i++) {
		arena_bin_t *bin = &arena->bins[i];
		tcache_bin_t *tbin = &tcache->tbins[i];
		bin->stats.nrequests += tbin->tstats.nrequests;
		arena->stats.nmalloc_medium += tbin->tstats.nrequests;
		tbin->tstats.nrequests = 0;
	}
}
#endif

void
tcache_boot(void)
{

	if (opt_tcache) {
		/* Compute incremental GC event threshold. */
		if (opt_lg_tcache_gc_sweep >= 0) {
			tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
			    nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
			    0) ? 0 : 1);
		} else
			tcache_gc_incr = 0;

		if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
		    0) {
			malloc_write(
			    "<jemalloc>: Error in pthread_key_create()\n");
			abort();
		}
	}
}
/******************************************************************************/
#endif /* JEMALLOC_TCACHE */