Introduce hpdata_t.

Using an edata_t both for hugepages and the allocations within those hugepages
was convenient at first, but has outlived its usefulness.  Representing
hugepages explicitly, with their own data structure, will make future
development easier.
This commit is contained in:
David Goldblatt
2020-11-17 16:32:45 -08:00
committed by David Goldblatt
parent 4a15008cfb
commit ca30b5db2b
17 changed files with 414 additions and 405 deletions

View File

@@ -4,6 +4,7 @@
#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/bin_info.h"
#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/hpdata.h"
#include "jemalloc/internal/nstime.h"
#include "jemalloc/internal/ph.h"
#include "jemalloc/internal/ql.h"
@@ -71,7 +72,6 @@ struct edata_map_info_s {
typedef struct edata_s edata_t;
typedef ph(edata_t) edata_avail_t;
typedef ph(edata_t) edata_heap_t;
typedef ph(edata_t) edata_age_heap_t;
struct edata_s {
/*
* Bitfield containing several fields:
@@ -194,41 +194,13 @@ struct edata_s {
};
/*
* In some context-specific sense, the age of an active extent. Each
* context can pick a specific meaning, and share the definition of the
* edata_age_heap_t below.
* If this edata is a user allocation from an HPA, it comes out of some
* pageslab (we don't yet support huegpage allocations that don't fit
* into pageslabs). This tracks it.
*/
uint64_t age;
union {
/*
* We could steal a low bit from these fields to indicate what
* sort of "thing" this is (a page slab, an object within a page
* slab, or a non-pageslab range). We don't do this yet, but it
* would enable some extra asserts.
*/
/*
* If this edata is a user allocation from an HPA, it comes out
* of some pageslab (we don't yet support huegpage allocations
* that don't fit into pageslabs). This tracks it.
*/
edata_t *ps;
/*
* If this edata *is* a pageslab, then we cache some useful
* information about its associated bitmap.
*/
struct {
/*
* The longest free range a pageslab contains determines
* the heap it lives in. If we know that it didn't
* change after an operation, we can avoid moving it
* between heaps.
*/
uint32_t longest_free_range;
/* Whether or not the slab is backed by a hugepage. */
bool hugeified;
};
};
hpdata_t *e_ps;
/* Extra field reserved for HPA. */
void *e_reserved;
union {
/*
@@ -330,11 +302,6 @@ edata_pai_get(const edata_t *edata) {
EDATA_BITS_PAI_SHIFT);
}
static inline bool
edata_hugeified_get(const edata_t *edata) {
return edata->hugeified;
}
static inline bool
edata_slab_get(const edata_t *edata) {
return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
@@ -377,21 +344,10 @@ edata_bsize_get(const edata_t *edata) {
return edata->e_bsize;
}
static inline uint64_t
edata_age_get(const edata_t *edata) {
return edata->age;
}
static inline edata_t *
static inline hpdata_t *
edata_ps_get(const edata_t *edata) {
assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
return edata->ps;
}
static inline uint32_t
edata_longest_free_range_get(const edata_t *edata) {
assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
return edata->longest_free_range;
return edata->e_ps;
}
static inline void *
@@ -477,21 +433,9 @@ edata_bsize_set(edata_t *edata, size_t bsize) {
}
static inline void
edata_age_set(edata_t *edata, uint64_t age) {
edata->age = age;
}
static inline void
edata_ps_set(edata_t *edata, edata_t *ps) {
assert(edata_pai_get(edata) == EXTENT_PAI_HPA || ps == NULL);
edata->ps = ps;
}
static inline void
edata_longest_free_range_set(edata_t *edata, uint32_t longest_free_range) {
assert(edata_pai_get(edata) == EXTENT_PAI_HPA
|| longest_free_range == 0);
edata->longest_free_range = longest_free_range;
edata_ps_set(edata_t *edata, hpdata_t *ps) {
assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
edata->e_ps = ps;
}
static inline void
@@ -566,11 +510,6 @@ edata_pai_set(edata_t *edata, extent_pai_t pai) {
((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
}
static inline void
edata_hugeified_set(edata_t *edata, bool hugeified) {
edata->hugeified = hugeified;
}
static inline void
edata_slab_set(edata_t *edata, bool slab) {
edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
@@ -633,9 +572,6 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
if (config_prof) {
edata_prof_tctx_set(edata, NULL);
}
edata_age_set(edata, 0);
edata_ps_set(edata, NULL);
edata_longest_free_range_set(edata, 0);
}
static inline void
@@ -649,15 +585,12 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
edata_state_set(edata, extent_state_active);
edata_zeroed_set(edata, true);
edata_committed_set(edata, true);
edata_age_set(edata, 0);
/*
* This isn't strictly true, but base allocated extents never get
* deallocated and can't be looked up in the emap, but no sense in
* wasting a state bit to encode this fact.
*/
edata_pai_set(edata, EXTENT_PAI_PAC);
edata_ps_set(edata, NULL);
edata_longest_free_range_set(edata, 0);
}
static inline int
@@ -718,25 +651,7 @@ edata_esnead_comp(const edata_t *a, const edata_t *b) {
return ret;
}
static inline int
edata_age_comp(const edata_t *a, const edata_t *b) {
uint64_t a_age = edata_age_get(a);
uint64_t b_age = edata_age_get(b);
/*
* Equal ages are possible in certain race conditions, like two distinct
* threads simultaneously allocating a new fresh slab without holding a
* bin lock.
*/
int ret = (a_age > b_age) - (a_age < b_age);
if (ret != 0) {
return ret;
}
return edata_snad_comp(a, b);
}
ph_proto(, edata_avail_, edata_avail_t, edata_t)
ph_proto(, edata_heap_, edata_heap_t, edata_t)
ph_proto(, edata_age_heap_, edata_age_heap_t, edata_t);
#endif /* JEMALLOC_INTERNAL_EDATA_H */

View File

@@ -21,6 +21,8 @@ struct hpa_shard_s {
pai_t pai;
malloc_mutex_t grow_mtx;
malloc_mutex_t mtx;
/* The base metadata allocator. */
base_t *base;
/*
* This edata cache is the one we use when allocating a small extent
* from a pageslab. The pageslab itself comes from the centralized
@@ -45,7 +47,14 @@ struct hpa_shard_s {
*
* Guarded by grow_mtx.
*/
edata_list_inactive_t unused_slabs;
hpdata_list_t unused_slabs;
/*
* How many grow operations have occurred.
*
* Guarded by grow_mtx.
*/
uint64_t age_counter;
/*
* Either NULL (if empty), or some integer multiple of a
@@ -54,7 +63,8 @@ struct hpa_shard_s {
*
* Guarded by grow_mtx.
*/
edata_t *eden;
void *eden;
size_t eden_len;
/* The arena ind we're associated with. */
unsigned ind;
@@ -67,7 +77,7 @@ struct hpa_shard_s {
* just that it can function properly given the system it's running on.
*/
bool hpa_supported();
bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap,
bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
edata_cache_t *edata_cache, unsigned ind, size_t alloc_max);
void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);

View File

@@ -0,0 +1,124 @@
#ifndef JEMALLOC_INTERNAL_HPDATA_H
#define JEMALLOC_INTERNAL_HPDATA_H
#include "jemalloc/internal/flat_bitmap.h"
#include "jemalloc/internal/ph.h"
#include "jemalloc/internal/ql.h"
#include "jemalloc/internal/typed_list.h"
/*
* The metadata representation we use for extents in hugepages. While the PAC
* uses the edata_t to represent both active and inactive extents, the HP only
* uses the edata_t for active ones; instead, inactive extent state is tracked
* within hpdata associated with the enclosing hugepage-sized, hugepage-aligned
* region of virtual address space.
*
* An hpdata need not be "truly" backed by a hugepage (which is not necessarily
* an observable property of any given region of address space). It's just
* hugepage-sized and hugepage-aligned; it's *potentially* huge.
*/
typedef struct hpdata_s hpdata_t;
struct hpdata_s {
/*
* We likewise follow the edata convention of mangling names and forcing
* the use of accessors -- this lets us add some consistency checks on
* access.
*/
/*
* The address of the hugepage in question. This can't be named h_addr,
* since that conflicts with a macro defined in Windows headers.
*/
void *h_address;
/* Its age (measured in psset operations). */
uint64_t h_age;
/* Whether or not we think the hugepage is mapped that way by the OS. */
bool h_huge;
union {
/* When nonempty, used by the psset bins. */
phn(hpdata_t) ph_link;
/*
* When empty (or not corresponding to any hugepage), list
* linkage.
*/
ql_elm(hpdata_t) ql_link;
};
/* Number of currently free pages (regardless of contiguity). */
size_t h_nfree;
/* The length of the largest contiguous sequence of inactive pages. */
size_t h_longest_free_range;
/* A bitmap with bits set in the active pages. */
fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
};
static inline void *
hpdata_addr_get(const hpdata_t *hpdata) {
return hpdata->h_address;
}
static inline void
hpdata_addr_set(hpdata_t *hpdata, void *addr) {
assert(HUGEPAGE_ADDR2BASE(addr) == addr);
hpdata->h_address = addr;
}
static inline uint64_t
hpdata_age_get(const hpdata_t *hpdata) {
return hpdata->h_age;
}
static inline void
hpdata_age_set(hpdata_t *hpdata, uint64_t age) {
hpdata->h_age = age;
}
static inline bool
hpdata_huge_get(const hpdata_t *hpdata) {
return hpdata->h_huge;
}
static inline void
hpdata_huge_set(hpdata_t *hpdata, bool huge) {
hpdata->h_huge = huge;
}
static inline size_t
hpdata_nfree_get(const hpdata_t *hpdata) {
return hpdata->h_nfree;
}
static inline void
hpdata_nfree_set(hpdata_t *hpdata, size_t nfree) {
assert(nfree <= HUGEPAGE_PAGES);
hpdata->h_nfree = nfree;
}
static inline size_t
hpdata_longest_free_range_get(const hpdata_t *hpdata) {
return hpdata->h_longest_free_range;
}
static inline void
hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
assert(longest_free_range <= HUGEPAGE_PAGES);
hpdata->h_longest_free_range = longest_free_range;
}
static inline void
hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
hpdata_addr_set(hpdata, addr);
hpdata_age_set(hpdata, age);
hpdata_huge_set(hpdata, false);
hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
}
TYPED_LIST(hpdata_list, hpdata_t, ql_link)
typedef ph(hpdata_t) hpdata_age_heap_t;
ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
#endif /* JEMALLOC_INTERNAL_HPDATA_H */

View File

@@ -17,6 +17,20 @@
/* Huge page size. LG_HUGEPAGE is determined by the configure script. */
#define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE))
#define HUGEPAGE_MASK ((size_t)(HUGEPAGE - 1))
#if LG_HUGEPAGE != 0
# define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
#else
/*
* It's convenient to define arrays (or bitmaps) of HUGEPAGE_PAGES lengths. If
* we can't autodetect the hugepage size, it gets treated as 0, in which case
* we'll trigger a compiler error in those arrays. Avoid this case by ensuring
* that this value is at least 1. (We won't ever run in this degraded state;
* hpa_supported() returns false in this case.
*/
# define HUGEPAGE_PAGES 1
#endif
/* Return the huge page base address for the huge page containing address a. */
#define HUGEPAGE_ADDR2BASE(a) \
((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))

View File

@@ -1,6 +1,8 @@
#ifndef JEMALLOC_INTERNAL_PSSET_H
#define JEMALLOC_INTERNAL_PSSET_H
#include "jemalloc/internal/hpdata.h"
/*
* A page-slab set. What the eset is to PAC, the psset is to HPA. It maintains
* a collection of page-slabs (the intent being that they are backed by
@@ -51,21 +53,18 @@ struct psset_s {
* The pageslabs, quantized by the size class of the largest contiguous
* free run of pages in a pageslab.
*/
edata_age_heap_t pageslabs[PSSET_NPSIZES];
hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
psset_stats_t stats;
/* How many alloc_new calls have happened? */
uint64_t age_counter;
};
void psset_init(psset_t *psset);
void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
void psset_insert(psset_t *psset, edata_t *ps);
void psset_remove(psset_t *psset, edata_t *ps);
void psset_insert(psset_t *psset, hpdata_t *ps);
void psset_remove(psset_t *psset, hpdata_t *ps);
void psset_hugify(psset_t *psset, edata_t *ps);
void psset_hugify(psset_t *psset, hpdata_t *ps);
/*
* Tries to obtain a chunk from an existing pageslab already in the set.
@@ -78,7 +77,7 @@ bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
* to the psset and allocate an extent from within it. The passed-in pageslab
* must be at least as big as size.
*/
void psset_alloc_new(psset_t *psset, edata_t *ps,
void psset_alloc_new(psset_t *psset, hpdata_t *ps,
edata_t *r_edata, size_t size);
/*
@@ -89,6 +88,6 @@ void psset_alloc_new(psset_t *psset, edata_t *ps,
* result must be checked and deallocated to the central HPA. Otherwise returns
* NULL.
*/
edata_t *psset_dalloc(psset_t *psset, edata_t *edata);
hpdata_t *psset_dalloc(psset_t *psset, edata_t *edata);
#endif /* JEMALLOC_INTERNAL_PSSET_H */