Enable fast thread locals for dealloc-only threads.

Previously if a thread does only allocations, it stays on the slow path /
minimal initialized state forever.  However, dealloc-only is a valid pattern for
dedicated reclamation threads -- this means thread cache is disabled (no batched
flush) for them, which causes high overhead and contention.

Added the condition to fully initialize TSD when a fair amount of dealloc
activities are observed.
This commit is contained in:
Qi Wang
2022-10-21 15:10:48 -07:00
committed by Qi Wang
parent be65438f20
commit 143e9c4a2f
3 changed files with 77 additions and 1 deletions

View File

@@ -59,6 +59,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
#define TSD_DATA_SLOW \
O(tcache_enabled, bool, bool) \
O(reentrancy_level, int8_t, int8_t) \
O(min_init_state_nfetched, uint8_t, uint8_t) \
O(thread_allocated_last_event, uint64_t, uint64_t) \
O(thread_allocated_next_event, uint64_t, uint64_t) \
O(thread_deallocated_last_event, uint64_t, uint64_t) \
@@ -91,6 +92,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
#define TSD_DATA_SLOW_INITIALIZER \
/* tcache_enabled */ TCACHE_ENABLED_ZERO_INITIALIZER, \
/* reentrancy_level */ 0, \
/* min_init_state_nfetched */ 0, \
/* thread_allocated_last_event */ 0, \
/* thread_allocated_next_event */ 0, \
/* thread_deallocated_last_event */ 0, \
@@ -177,6 +179,8 @@ void tsd_global_slow_inc(tsdn_t *tsdn);
void tsd_global_slow_dec(tsdn_t *tsdn);
bool tsd_global_slow();
#define TSD_MIN_INIT_STATE_MAX_FETCHED (128)
enum {
/* Common case --> jnz. */
tsd_state_nominal = 0,