Avoid eager purging on the dedicated oversize arena when using bg thds.

We have observed new workload patterns (namely ML training type) that cycle
through oversized allocations frequently, because 1) the dataset might be sparse
which is faster to go through, and 2) GPU accelerated.  As a result, the eager
purging from the oversize arena becomes a bottleneck.  To offer an easy
solution, allow normal purging of the oversized extents when background threads
are enabled.
This commit is contained in:
Qi Wang 2023-06-23 14:13:26 -07:00 committed by Qi Wang
parent 46e464a26b
commit d131331310
3 changed files with 42 additions and 25 deletions

View File

@ -1730,6 +1730,42 @@ label_error:
return NULL;
}
static arena_t *
arena_create_huge_arena(tsd_t *tsd, unsigned ind) {
assert(ind != 0);
arena_t *huge_arena = arena_get(tsd_tsdn(tsd), ind, true);
if (huge_arena == NULL) {
return NULL;
}
char *huge_arena_name = "auto_oversize";
strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN);
huge_arena->name[ARENA_NAME_LEN - 1] = '\0';
/*
* Purge eagerly for huge allocations, because: 1) number of huge
* allocations is usually small, which means ticker based decay is not
* reliable; and 2) less immediate reuse is expected for huge
* allocations.
*
* However, with background threads enabled, keep normal purging since
* the purging delay is bounded.
*/
if (!background_thread_enabled()
&& arena_dirty_decay_ms_default_get() > 0) {
arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
extent_state_dirty, 0);
}
if (!background_thread_enabled()
&&arena_muzzy_decay_ms_default_get() > 0) {
arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
extent_state_muzzy, 0);
}
return huge_arena;
}
arena_t *
arena_choose_huge(tsd_t *tsd) {
/* huge_arena_ind can be 0 during init (will use a0). */
@ -1740,30 +1776,7 @@ arena_choose_huge(tsd_t *tsd) {
arena_t *huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, false);
if (huge_arena == NULL) {
/* Create the huge arena on demand. */
assert(huge_arena_ind != 0);
huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, true);
if (huge_arena == NULL) {
return NULL;
}
char *huge_arena_name = "auto_oversize";
strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN);
huge_arena->name[ARENA_NAME_LEN - 1] = '\0';
/*
* Purge eagerly for huge allocations, because: 1) number of
* huge allocations is usually small, which means ticker based
* decay is not reliable; and 2) less immediate reuse is
* expected for huge allocations.
*/
if (arena_dirty_decay_ms_default_get() > 0) {
arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
extent_state_dirty, 0);
}
if (arena_muzzy_decay_ms_default_get() > 0) {
arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
extent_state_muzzy, 0);
}
huge_arena = arena_create_huge_arena(tsd, huge_arena_ind);
}
return huge_arena;

View File

@ -944,6 +944,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
} while (coalesced);
if (edata_size_get(edata) >=
atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)
&& !background_thread_enabled()
&& extent_may_force_decay(pac)) {
/* Shortcut to purge the oversize extent eagerly. */
malloc_mutex_unlock(tsdn, &ecache->mtx);

View File

@ -120,7 +120,10 @@ TEST_BEGIN(test_oversize_threshold) {
*/
ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena));
dallocx(ptr, MALLOCX_TCACHE_NONE);
expect_zu_ge(max_purged, 2 * 1024 * 1024, "Expected a 2MB purge");
if (!is_background_thread_enabled()) {
expect_zu_ge(max_purged, 2 * 1024 * 1024,
"Expected a 2MB purge");
}
}
TEST_END