#include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/hpa.h" #include "jemalloc/internal/flat_bitmap.h" #include "jemalloc/internal/witness.h" static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero); static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, size_t new_size, bool zero); static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, size_t new_size); static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata); bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) { bool err; /* * We fundamentally rely on a address-space-hungry growth strategy for * hugepages. This may change in the future, but for now we should have * refused to turn on any HPA at a higher level of the stack. */ assert(LG_SIZEOF_PTR == 3); err = malloc_mutex_init(&hpa->grow_mtx, "hpa_grow", WITNESS_RANK_HPA_GROW, malloc_mutex_rank_exclusive); if (err) { return true; } err = malloc_mutex_init(&hpa->mtx, "hpa", WITNESS_RANK_HPA, malloc_mutex_rank_exclusive); if (err) { return true; } hpa_central_init(&hpa->central, edata_cache, emap); if (err) { return true; } hpa->ind = base_ind_get(base); hpa->edata_cache = edata_cache; exp_grow_init(&hpa->exp_grow); return false; } bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache, unsigned ind, size_t ps_goal, size_t ps_alloc_max, size_t small_max, size_t large_min) { bool err; err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow", WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive); if (err) { return true; } err = malloc_mutex_init(&shard->mtx, "hpa_shard", WITNESS_RANK_HPA_SHARD, malloc_mutex_rank_exclusive); if (err) { return true; } assert(edata_cache != NULL); edata_cache_small_init(&shard->ecs, edata_cache); shard->hpa = hpa; psset_init(&shard->psset); shard->ps_goal = ps_goal; shard->ps_alloc_max = ps_alloc_max; shard->small_max = small_max; shard->large_min = large_min; /* * Fill these in last, so that if an hpa_shard gets used despite * initialization failing, we'll at least crash instead of just * operating on corrupted data. */ shard->pai.alloc = &hpa_alloc; shard->pai.expand = &hpa_expand; shard->pai.shrink = &hpa_shrink; shard->pai.dalloc = &hpa_dalloc; shard->ind = ind; assert(ind == base_ind_get(edata_cache->base)); return false; } static edata_t * hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min, size_t size_goal) { bool err; edata_t *edata; hpa_t *hpa = shard->hpa; malloc_mutex_lock(tsdn, &hpa->mtx); edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min, size_goal); malloc_mutex_unlock(tsdn, &hpa->mtx); if (edata != NULL) { edata_arena_ind_set(edata, shard->ind); return edata; } /* No existing range can satisfy the request; try to grow. */ malloc_mutex_lock(tsdn, &hpa->grow_mtx); /* * We could have raced with other grow attempts; re-check to see if we * did, and are now able to satisfy the request. */ malloc_mutex_lock(tsdn, &hpa->mtx); edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min, size_goal); malloc_mutex_unlock(tsdn, &hpa->mtx); if (edata != NULL) { malloc_mutex_unlock(tsdn, &hpa->grow_mtx); edata_arena_ind_set(edata, shard->ind); return edata; } /* * No such luck. We've dropped mtx, so other allocations can proceed * while we allocate the new extent. We know no one else will grow in * the meantime, though, since we still hold grow_mtx. */ size_t alloc_size; pszind_t skip; size_t hugepage_goal_min = HUGEPAGE_CEILING(size_goal); err = exp_grow_size_prepare(&hpa->exp_grow, hugepage_goal_min, &alloc_size, &skip); if (err) { malloc_mutex_unlock(tsdn, &hpa->grow_mtx); return NULL; } alloc_size = HUGEPAGE_CEILING(alloc_size); /* * Eventually, we need to think about this more systematically, and in * terms of extent hooks. For now, though, we know we only care about * overcommitting systems, and we're not going to purge much. */ bool commit = true; void *addr = pages_map(NULL, alloc_size, HUGEPAGE, &commit); if (addr == NULL) { malloc_mutex_unlock(tsdn, &hpa->grow_mtx); return NULL; } err = pages_huge(addr, alloc_size); /* * Ignore this for now; even if the allocation fails, the address space * should still be usable. */ (void)err; edata = edata_cache_get(tsdn, hpa->edata_cache); if (edata == NULL) { malloc_mutex_unlock(tsdn, &hpa->grow_mtx); pages_unmap(addr, alloc_size); return NULL; } /* * The serial number here is just a placeholder; the hpa_central gets to * decide how it wants to fill it in. * * The grow edata is associated with the hpa_central_t arena ind; the * subsequent allocation we get (in the hpa_central_alloc_grow call * below) will be filled in with the shard ind. */ edata_init(edata, hpa->ind, addr, alloc_size, /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ true, /* comitted */ true, EXTENT_PAI_HPA, /* is_head */ true); malloc_mutex_lock(tsdn, &hpa->mtx); /* Note that this replace edata with the allocation to return. */ err = hpa_central_alloc_grow(tsdn, &hpa->central, size_goal, edata); malloc_mutex_unlock(tsdn, &hpa->mtx); if (!err) { exp_grow_size_commit(&hpa->exp_grow, skip); } malloc_mutex_unlock(tsdn, &hpa->grow_mtx); edata_arena_ind_set(edata, shard->ind); if (err) { pages_unmap(addr, alloc_size); edata_cache_put(tsdn, hpa->edata_cache, edata); return NULL; } return edata; } static edata_t * hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) { assert(size <= shard->ps_alloc_max); bool err; malloc_mutex_lock(tsdn, &shard->mtx); edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs); if (edata == NULL) { malloc_mutex_unlock(tsdn, &shard->mtx); return NULL; } edata_arena_ind_set(edata, shard->ind); err = psset_alloc_reuse(&shard->psset, edata, size); malloc_mutex_unlock(tsdn, &shard->mtx); if (!err) { return edata; } /* Nothing in the psset works; we have to grow it. */ malloc_mutex_lock(tsdn, &shard->grow_mtx); /* As above; check for grow races. */ malloc_mutex_lock(tsdn, &shard->mtx); err = psset_alloc_reuse(&shard->psset, edata, size); malloc_mutex_unlock(tsdn, &shard->mtx); if (!err) { malloc_mutex_unlock(tsdn, &shard->grow_mtx); return edata; } edata_t *grow_edata = hpa_alloc_central(tsdn, shard, size, shard->ps_goal); if (grow_edata == NULL) { malloc_mutex_unlock(tsdn, &shard->grow_mtx); malloc_mutex_lock(tsdn, &shard->mtx); edata_cache_small_put(tsdn, &shard->ecs, edata); malloc_mutex_unlock(tsdn, &shard->mtx); return NULL; } edata_arena_ind_set(grow_edata, shard->ind); edata_slab_set(grow_edata, true); fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap; fb_init(fb, shard->ps_goal / PAGE); /* We got the new edata; allocate from it. */ malloc_mutex_lock(tsdn, &shard->mtx); psset_alloc_new(&shard->psset, grow_edata, edata, size); malloc_mutex_unlock(tsdn, &shard->mtx); malloc_mutex_unlock(tsdn, &shard->grow_mtx); return edata; } static hpa_shard_t * hpa_from_pai(pai_t *self) { assert(self->alloc = &hpa_alloc); assert(self->expand = &hpa_expand); assert(self->shrink = &hpa_shrink); assert(self->dalloc = &hpa_dalloc); return (hpa_shard_t *)self; } static edata_t * hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) { assert((size & PAGE_MASK) == 0); hpa_shard_t *shard = hpa_from_pai(self); /* We don't handle alignment or zeroing for now. */ if (alignment > PAGE || zero) { return NULL; } if (size > shard->small_max && size < shard->large_min) { return NULL; } witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); edata_t *edata; if (size <= shard->ps_alloc_max) { edata = hpa_alloc_psset(tsdn, shard, size); if (edata != NULL) { emap_register_boundary(tsdn, shard->hpa->central.emap, edata, SC_NSIZES, /* slab */ false); } } else { edata = hpa_alloc_central(tsdn, shard, size, size); } witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); if (edata != NULL) { emap_assert_mapped(tsdn, shard->hpa->central.emap, edata); assert(edata_pai_get(edata) == EXTENT_PAI_HPA); assert(edata_state_get(edata) == extent_state_active); assert(edata_arena_ind_get(edata) == shard->ind); assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES); assert(!edata_slab_get(edata)); assert(edata_committed_get(edata)); assert(edata_base_get(edata) == edata_addr_get(edata)); assert(edata_base_get(edata) != NULL); } return edata; } static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, size_t new_size, bool zero) { /* Expand not yet supported. */ return true; } static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, size_t new_size) { /* Shrink not yet supported. */ return true; } static void hpa_dalloc_central(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) { hpa_t *hpa = shard->hpa; edata_arena_ind_set(edata, hpa->ind); malloc_mutex_lock(tsdn, &hpa->mtx); hpa_central_dalloc(tsdn, &hpa->central, edata); malloc_mutex_unlock(tsdn, &hpa->mtx); } static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) { hpa_shard_t *shard = hpa_from_pai(self); edata_addr_set(edata, edata_base_get(edata)); edata_zeroed_set(edata, false); assert(edata_pai_get(edata) == EXTENT_PAI_HPA); assert(edata_state_get(edata) == extent_state_active); assert(edata_arena_ind_get(edata) == shard->ind); assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES); assert(!edata_slab_get(edata)); assert(edata_committed_get(edata)); assert(edata_base_get(edata) != NULL); /* * There are two cases: * - The psset field is NULL. In this case, the edata comes directly * from the hpa_central_t and should be returned to it. * - THe psset field is not NULL, in which case we return the edata to * the appropriate slab (which may in turn cause it to become empty, * triggering an eviction of the whole slab, which should then be * returned to the hpa_central_t). */ if (edata_ps_get(edata) != NULL) { emap_deregister_boundary(tsdn, shard->hpa->central.emap, edata); malloc_mutex_lock(tsdn, &shard->mtx); edata_t *evicted_ps = psset_dalloc(&shard->psset, edata); edata_cache_small_put(tsdn, &shard->ecs, edata); malloc_mutex_unlock(tsdn, &shard->mtx); if (evicted_ps != NULL) { /* * The deallocation caused a pageslab to become empty. * Free it back to the centralized allocator. */ bool err = emap_register_boundary(tsdn, shard->hpa->central.emap, evicted_ps, SC_NSIZES, /* slab */ false); /* * Registration can only fail on OOM, but the boundary * mappings should have been initialized during * allocation. */ assert(!err); edata_slab_set(evicted_ps, false); edata_ps_set(evicted_ps, NULL); assert(edata_arena_ind_get(evicted_ps) == shard->ind); hpa_dalloc_central(tsdn, shard, evicted_ps); } } else { hpa_dalloc_central(tsdn, shard, edata); } } static void hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) { assert(bin_stats->npageslabs == 0); assert(bin_stats->nactive == 0); assert(bin_stats->ninactive == 0); } void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_lock(tsdn, &shard->mtx); edata_cache_small_disable(tsdn, &shard->ecs); malloc_mutex_unlock(tsdn, &shard->mtx); } void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) { /* * By the time we're here, the arena code should have dalloc'd all the * active extents, which means we should have eventually evicted * everything from the psset, so it shouldn't be able to serve even a * 1-page allocation. */ if (config_debug) { edata_t edata = {0}; malloc_mutex_lock(tsdn, &shard->mtx); bool psset_empty = psset_alloc_reuse(&shard->psset, &edata, PAGE); malloc_mutex_unlock(tsdn, &shard->mtx); assert(psset_empty); hpa_shard_assert_stats_empty(&shard->psset.full_slab_stats); for (pszind_t i = 0; i < PSSET_NPSIZES; i++) { hpa_shard_assert_stats_empty( &shard->psset.slab_stats[i]); } } } void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_prefork(tsdn, &shard->grow_mtx); } void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_prefork(tsdn, &shard->mtx); } void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx); malloc_mutex_postfork_parent(tsdn, &shard->mtx); } void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) { malloc_mutex_postfork_child(tsdn, &shard->grow_mtx); malloc_mutex_postfork_child(tsdn, &shard->mtx); } void hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa) { malloc_mutex_prefork(tsdn, &hpa->grow_mtx); malloc_mutex_prefork(tsdn, &hpa->mtx); } void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa) { malloc_mutex_postfork_parent(tsdn, &hpa->grow_mtx); malloc_mutex_postfork_parent(tsdn, &hpa->mtx); } void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa) { malloc_mutex_postfork_child(tsdn, &hpa->grow_mtx); malloc_mutex_postfork_child(tsdn, &hpa->mtx); }