From d1313313101f9df127bba08bf8fd90a849bf3b87 Mon Sep 17 00:00:00 2001 From: Qi Wang Date: Fri, 23 Jun 2023 14:13:26 -0700 Subject: [PATCH] Avoid eager purging on the dedicated oversize arena when using bg thds. We have observed new workload patterns (namely ML training type) that cycle through oversized allocations frequently, because 1) the dataset might be sparse which is faster to go through, and 2) GPU accelerated. As a result, the eager purging from the oversize arena becomes a bottleneck. To offer an easy solution, allow normal purging of the oversized extents when background threads are enabled. --- src/arena.c | 61 +++++++++++++++++++++------------- src/extent.c | 1 + test/unit/oversize_threshold.c | 5 ++- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/src/arena.c b/src/arena.c index ab1a9ab8..3b151b77 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1730,6 +1730,42 @@ label_error: return NULL; } +static arena_t * +arena_create_huge_arena(tsd_t *tsd, unsigned ind) { + assert(ind != 0); + + arena_t *huge_arena = arena_get(tsd_tsdn(tsd), ind, true); + if (huge_arena == NULL) { + return NULL; + } + + char *huge_arena_name = "auto_oversize"; + strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN); + huge_arena->name[ARENA_NAME_LEN - 1] = '\0'; + + /* + * Purge eagerly for huge allocations, because: 1) number of huge + * allocations is usually small, which means ticker based decay is not + * reliable; and 2) less immediate reuse is expected for huge + * allocations. + * + * However, with background threads enabled, keep normal purging since + * the purging delay is bounded. + */ + if (!background_thread_enabled() + && arena_dirty_decay_ms_default_get() > 0) { + arena_decay_ms_set(tsd_tsdn(tsd), huge_arena, + extent_state_dirty, 0); + } + if (!background_thread_enabled() + &&arena_muzzy_decay_ms_default_get() > 0) { + arena_decay_ms_set(tsd_tsdn(tsd), huge_arena, + extent_state_muzzy, 0); + } + + return huge_arena; +} + arena_t * arena_choose_huge(tsd_t *tsd) { /* huge_arena_ind can be 0 during init (will use a0). */ @@ -1740,30 +1776,7 @@ arena_choose_huge(tsd_t *tsd) { arena_t *huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, false); if (huge_arena == NULL) { /* Create the huge arena on demand. */ - assert(huge_arena_ind != 0); - huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, true); - if (huge_arena == NULL) { - return NULL; - } - - char *huge_arena_name = "auto_oversize"; - strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN); - huge_arena->name[ARENA_NAME_LEN - 1] = '\0'; - - /* - * Purge eagerly for huge allocations, because: 1) number of - * huge allocations is usually small, which means ticker based - * decay is not reliable; and 2) less immediate reuse is - * expected for huge allocations. - */ - if (arena_dirty_decay_ms_default_get() > 0) { - arena_decay_ms_set(tsd_tsdn(tsd), huge_arena, - extent_state_dirty, 0); - } - if (arena_muzzy_decay_ms_default_get() > 0) { - arena_decay_ms_set(tsd_tsdn(tsd), huge_arena, - extent_state_muzzy, 0); - } + huge_arena = arena_create_huge_arena(tsd, huge_arena_ind); } return huge_arena; diff --git a/src/extent.c b/src/extent.c index 18e4698c..477050b6 100644 --- a/src/extent.c +++ b/src/extent.c @@ -944,6 +944,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache, } while (coalesced); if (edata_size_get(edata) >= atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED) + && !background_thread_enabled() && extent_may_force_decay(pac)) { /* Shortcut to purge the oversize extent eagerly. */ malloc_mutex_unlock(tsdn, &ecache->mtx); diff --git a/test/unit/oversize_threshold.c b/test/unit/oversize_threshold.c index 44a8f76a..95ce6537 100644 --- a/test/unit/oversize_threshold.c +++ b/test/unit/oversize_threshold.c @@ -120,7 +120,10 @@ TEST_BEGIN(test_oversize_threshold) { */ ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena)); dallocx(ptr, MALLOCX_TCACHE_NONE); - expect_zu_ge(max_purged, 2 * 1024 * 1024, "Expected a 2MB purge"); + if (!is_background_thread_enabled()) { + expect_zu_ge(max_purged, 2 * 1024 * 1024, + "Expected a 2MB purge"); + } } TEST_END