server-skynet-source-3rd-je.../src/tsd.c

#include "jemalloc/internal/jemalloc_preamble.h"
#include "jemalloc/internal/jemalloc_internal_includes.h"

#include "jemalloc/internal/assert.h"
#include "jemalloc/internal/san.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/rtree.h"

/******************************************************************************/
/* Data. */

/* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
JEMALLOC_DIAGNOSTIC_PUSH
JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS

#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false;
bool tsd_booted = false;
#elif (defined(JEMALLOC_TLS))
JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
pthread_key_t tsd_tsd;
bool tsd_booted = false;
#elif (defined(_WIN32))
DWORD tsd_tsd;
tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
bool tsd_booted = false;
#else

/*
 * This contains a mutex, but it's pretty convenient to allow the mutex code to
 * have a dependency on tsd.  So we define the struct here, and only refer to it
 * by pointer in the header.
 */
struct tsd_init_head_s {
	ql_head(tsd_init_block_t) blocks;
	malloc_mutex_t lock;
};

pthread_key_t tsd_tsd;
tsd_init_head_t	tsd_init_head = {
	ql_head_initializer(blocks),
	MALLOC_MUTEX_INITIALIZER
};

tsd_wrapper_t tsd_boot_wrapper = {
	false,
	TSD_INITIALIZER
};
bool tsd_booted = false;
#endif

JEMALLOC_DIAGNOSTIC_POP

/******************************************************************************/

/* A list of all the tsds in the nominal state. */
typedef ql_head(tsd_t) tsd_list_t;
static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
static malloc_mutex_t tsd_nominal_tsds_lock;

/* How many slow-path-enabling features are turned on. */
static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);

static bool
tsd_in_nominal_list(tsd_t *tsd) {
	tsd_t *tsd_list;
	bool found = false;
	/*
	 * We don't know that tsd is nominal; it might not be safe to get data
	 * out of it here.
	 */
	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
		if (tsd == tsd_list) {
			found = true;
			break;
		}
	}
	malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock);
	return found;
}

static void
tsd_add_nominal(tsd_t *tsd) {
	assert(!tsd_in_nominal_list(tsd));
	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
	ql_elm_new(tsd, TSD_MANGLE(tsd_link));
	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
}

static void
tsd_remove_nominal(tsd_t *tsd) {
	assert(tsd_in_nominal_list(tsd));
	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
}

static void
tsd_force_recompute(tsdn_t *tsdn) {
	/*
	 * The stores to tsd->state here need to synchronize with the exchange
	 * in tsd_slow_update.
	 */
	atomic_fence(ATOMIC_RELEASE);
	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
	tsd_t *remote_tsd;
	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
		    <= tsd_state_nominal_max);
		tsd_atomic_store(&remote_tsd->state,
		    tsd_state_nominal_recompute, ATOMIC_RELAXED);
		/* See comments in te_recompute_fast_threshold(). */
		atomic_fence(ATOMIC_SEQ_CST);
		te_next_event_fast_set_non_nominal(remote_tsd);
	}
	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
}

void
tsd_global_slow_inc(tsdn_t *tsdn) {
	atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
	/*
	 * We unconditionally force a recompute, even if the global slow count
	 * was already positive.  If we didn't, then it would be possible for us
	 * to return to the user, have the user synchronize externally with some
	 * other thread, and then have that other thread not have picked up the
	 * update yet (since the original incrementing thread might still be
	 * making its way through the tsd list).
	 */
	tsd_force_recompute(tsdn);
}

void tsd_global_slow_dec(tsdn_t *tsdn) {
	atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
	/* See the note in ..._inc(). */
	tsd_force_recompute(tsdn);
}

static bool
tsd_local_slow(tsd_t *tsd) {
	return !tsd_tcache_enabled_get(tsd)
	    || tsd_reentrancy_level_get(tsd) > 0;
}

bool
tsd_global_slow() {
	return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
}

/******************************************************************************/

static uint8_t
tsd_state_compute(tsd_t *tsd) {
	if (!tsd_nominal(tsd)) {
		return tsd_state_get(tsd);
	}
	/* We're in *a* nominal state; but which one? */
	if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) {
		return tsd_state_nominal_slow;
	} else {
		return tsd_state_nominal;
	}
}

void
tsd_slow_update(tsd_t *tsd) {
	uint8_t old_state;
	do {
		uint8_t new_state = tsd_state_compute(tsd);
		old_state = tsd_atomic_exchange(&tsd->state, new_state,
		    ATOMIC_ACQUIRE);
	} while (old_state == tsd_state_nominal_recompute);

	te_recompute_fast_threshold(tsd);
}

void
tsd_state_set(tsd_t *tsd, uint8_t new_state) {
	/* Only the tsd module can change the state *to* recompute. */
	assert(new_state != tsd_state_nominal_recompute);
	uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED);
	if (old_state > tsd_state_nominal_max) {
		/*
		 * Not currently in the nominal list, but it might need to be
		 * inserted there.
		 */
		assert(!tsd_in_nominal_list(tsd));
		tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED);
		if (new_state <= tsd_state_nominal_max) {
			tsd_add_nominal(tsd);
		}
	} else {
		/*
		 * We're currently nominal.  If the new state is non-nominal,
		 * great; we take ourselves off the list and just enter the new
		 * state.
		 */
		assert(tsd_in_nominal_list(tsd));
		if (new_state > tsd_state_nominal_max) {
			tsd_remove_nominal(tsd);
			tsd_atomic_store(&tsd->state, new_state,
			    ATOMIC_RELAXED);
		} else {
			/*
			 * This is the tricky case.  We're transitioning from
			 * one nominal state to another.  The caller can't know
			 * about any races that are occurring at the same time,
			 * so we always have to recompute no matter what.
			 */
			tsd_slow_update(tsd);
		}
	}
	te_recompute_fast_threshold(tsd);
}

static void
tsd_prng_state_init(tsd_t *tsd) {
	/*
	 * A nondeterministic seed based on the address of tsd reduces
	 * the likelihood of lockstep non-uniform cache index
	 * utilization among identical concurrent processes, but at the
	 * cost of test repeatability.  For debug builds, instead use a
	 * deterministic seed.
	 */
	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
	    (uint64_t)(uintptr_t)tsd;
}

static bool
tsd_data_init(tsd_t *tsd) {
	/*
	 * We initialize the rtree context first (before the tcache), since the
	 * tcache initialization depends on it.
	 */
	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
	tsd_prng_state_init(tsd);
	tsd_te_init(tsd); /* event_init may use the prng state above. */
	tsd_san_init(tsd);
	return tsd_tcache_enabled_data_init(tsd);
}

static void
assert_tsd_data_cleanup_done(tsd_t *tsd) {
	assert(!tsd_nominal(tsd));
	assert(!tsd_in_nominal_list(tsd));
	assert(*tsd_arenap_get_unsafe(tsd) == NULL);
	assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
	assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
	assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
}

static bool
tsd_data_init_nocleanup(tsd_t *tsd) {
	assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
	    tsd_state_get(tsd) == tsd_state_minimal_initialized);
	/*
	 * During reincarnation, there is no guarantee that the cleanup function
	 * will be called (deallocation may happen after all tsd destructors).
	 * We set up tsd in a way that no cleanup is needed.
	 */
	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
	*tsd_reentrancy_levelp_get(tsd) = 1;
	tsd_prng_state_init(tsd);
	tsd_te_init(tsd); /* event_init may use the prng state above. */
	tsd_san_init(tsd);
	assert_tsd_data_cleanup_done(tsd);

	return false;
}

tsd_t *
tsd_fetch_slow(tsd_t *tsd, bool minimal) {
	assert(!tsd_fast(tsd));

	if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
		/*
		 * On slow path but no work needed.  Note that we can't
		 * necessarily *assert* that we're slow, because we might be
		 * slow because of an asynchronous modification to global state,
		 * which might be asynchronously modified *back*.
		 */
	} else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
		tsd_slow_update(tsd);
	} else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
		if (!minimal) {
			if (tsd_booted) {
				tsd_state_set(tsd, tsd_state_nominal);
				tsd_slow_update(tsd);
				/* Trigger cleanup handler registration. */
				tsd_set(tsd);
				tsd_data_init(tsd);
			}
		} else {
			tsd_state_set(tsd, tsd_state_minimal_initialized);
			tsd_set(tsd);
			tsd_data_init_nocleanup(tsd);
			*tsd_min_init_state_nfetchedp_get(tsd) = 1;
		}
	} else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
		/*
		 * If a thread only ever deallocates (e.g. dedicated reclamation
		 * threads), we want to help it to eventually escape the slow
		 * path (caused by the minimal initialized state).  The nfetched
		 * counter tracks the number of times the tsd has been accessed
		 * under the min init state, and triggers the switch to nominal
		 * once reached the max allowed count.
		 *
		 * This means at most 128 deallocations stay on the slow path.
		 *
		 * Also see comments in free_default().
		 */
		uint8_t *nfetched = tsd_min_init_state_nfetchedp_get(tsd);
		assert(*nfetched >= 1);
		(*nfetched)++;
		if (!minimal || *nfetched == TSD_MIN_INIT_STATE_MAX_FETCHED) {
			/* Switch to fully initialized. */
			tsd_state_set(tsd, tsd_state_nominal);
			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
			(*tsd_reentrancy_levelp_get(tsd))--;
			tsd_slow_update(tsd);
			tsd_data_init(tsd);
		} else {
			assert_tsd_data_cleanup_done(tsd);
		}
	} else if (tsd_state_get(tsd) == tsd_state_purgatory) {
		tsd_state_set(tsd, tsd_state_reincarnated);
		tsd_set(tsd);
		tsd_data_init_nocleanup(tsd);
	} else {
		assert(tsd_state_get(tsd) == tsd_state_reincarnated);
	}

	return tsd;
}

void *
malloc_tsd_malloc(size_t size) {
	return a0malloc(CACHELINE_CEILING(size));
}

void
malloc_tsd_dalloc(void *wrapper) {
	a0dalloc(wrapper);
}

#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
static unsigned ncleanups;
static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];

#ifndef _WIN32
JEMALLOC_EXPORT
#endif
void
_malloc_thread_cleanup(void) {
	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
	unsigned i;

	for (i = 0; i < ncleanups; i++) {
		pending[i] = true;
	}

	do {
		again = false;
		for (i = 0; i < ncleanups; i++) {
			if (pending[i]) {
				pending[i] = cleanups[i]();
				if (pending[i]) {
					again = true;
				}
			}
		}
	} while (again);
}

#ifndef _WIN32
JEMALLOC_EXPORT
#endif
void
_malloc_tsd_cleanup_register(bool (*f)(void)) {
	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
	cleanups[ncleanups] = f;
	ncleanups++;
}

#endif

static void
tsd_do_data_cleanup(tsd_t *tsd) {
	prof_tdata_cleanup(tsd);
	iarena_cleanup(tsd);
	arena_cleanup(tsd);
	tcache_cleanup(tsd);
	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
	*tsd_reentrancy_levelp_get(tsd) = 1;
}

void
tsd_cleanup(void *arg) {
	tsd_t *tsd = (tsd_t *)arg;

	switch (tsd_state_get(tsd)) {
	case tsd_state_uninitialized:
		/* Do nothing. */
		break;
	case tsd_state_minimal_initialized:
		/* This implies the thread only did free() in its life time. */
		/* Fall through. */
	case tsd_state_reincarnated:
		/*
		 * Reincarnated means another destructor deallocated memory
		 * after the destructor was called.  Cleanup isn't required but
		 * is still called for testing and completeness.
		 */
		assert_tsd_data_cleanup_done(tsd);
		JEMALLOC_FALLTHROUGH;
	case tsd_state_nominal:
	case tsd_state_nominal_slow:
		tsd_do_data_cleanup(tsd);
		tsd_state_set(tsd, tsd_state_purgatory);
		tsd_set(tsd);
		break;
	case tsd_state_purgatory:
		/*
		 * The previous time this destructor was called, we set the
		 * state to tsd_state_purgatory so that other destructors
		 * wouldn't cause re-creation of the tsd.  This time, do
		 * nothing, and do not request another callback.
		 */
		break;
	default:
		not_reached();
	}
#ifdef JEMALLOC_JET
	test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
	int *data = tsd_test_datap_get_unsafe(tsd);
	if (test_callback != NULL) {
		test_callback(data);
	}
#endif
}

tsd_t *
malloc_tsd_boot0(void) {
	tsd_t *tsd;

#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
	ncleanups = 0;
#endif
	if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
	    WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
		return NULL;
	}
	if (tsd_boot0()) {
		return NULL;
	}
	tsd = tsd_fetch();
	return tsd;
}

void
malloc_tsd_boot1(void) {
	tsd_boot1();
	tsd_t *tsd = tsd_fetch();
	/* malloc_slow has been set properly.  Update tsd_slow. */
	tsd_slow_update(tsd);
}

#ifdef _WIN32
static BOOL WINAPI
_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
	switch (fdwReason) {
#ifdef JEMALLOC_LAZY_LOCK
	case DLL_THREAD_ATTACH:
		isthreaded = true;
		break;
#endif
	case DLL_THREAD_DETACH:
		_malloc_thread_cleanup();
		break;
	default:
		break;
	}
	return true;
}

/*
 * We need to be able to say "read" here (in the "pragma section"), but have
 * hooked "read". We won't read for the rest of the file, so we can get away
 * with unhooking.
 */
#ifdef read
#  undef read
#endif

#ifdef _MSC_VER
#  ifdef _M_IX86
#    pragma comment(linker, "/INCLUDE:__tls_used")
#    pragma comment(linker, "/INCLUDE:_tls_callback")
#  else
#    pragma comment(linker, "/INCLUDE:_tls_used")
#    pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
#  endif
#  pragma section(".CRT$XLY",long,read)
#endif
JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
    DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
#endif

#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
    !defined(_WIN32))
void *
tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
	pthread_t self = pthread_self();
	tsd_init_block_t *iter;

	/* Check whether this thread has already inserted into the list. */
	malloc_mutex_lock(TSDN_NULL, &head->lock);
	ql_foreach(iter, &head->blocks, link) {
		if (iter->thread == self) {
			malloc_mutex_unlock(TSDN_NULL, &head->lock);
			return iter->data;
		}
	}
	/* Insert block into list. */
	ql_elm_new(block, link);
	block->thread = self;
	ql_tail_insert(&head->blocks, block, link);
	malloc_mutex_unlock(TSDN_NULL, &head->lock);
	return NULL;
}

void
tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
	malloc_mutex_lock(TSDN_NULL, &head->lock);
	ql_remove(&head->blocks, block, link);
	malloc_mutex_unlock(TSDN_NULL, &head->lock);
}
#endif

void
tsd_prefork(tsd_t *tsd) {
	malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
}

void
tsd_postfork_parent(tsd_t *tsd) {
	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
}

void
tsd_postfork_child(tsd_t *tsd) {
	malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
	ql_new(&tsd_nominal_tsds);

	if (tsd_state_get(tsd) <= tsd_state_nominal_max) {
		tsd_add_nominal(tsd);
	}
}